/src/simdutf/src/icelake/icelake_utf32_validation.inl.cpp
Line | Count | Source |
1 | | // file included directly |
2 | | |
3 | 0 | bool validate_utf32(const char32_t *buf, size_t len) { |
4 | 0 | if (simdutf_unlikely(len == 0)) { |
5 | 0 | return true; |
6 | 0 | } |
7 | 0 | const char32_t *end = buf + len; |
8 | |
|
9 | 0 | const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); |
10 | 0 | __m512i currentmax = _mm512_setzero_si512(); |
11 | 0 | __m512i currentoffsetmax = _mm512_setzero_si512(); |
12 | | |
13 | | // Optimized: Process 32 values (2x 512-bit) per iteration for better |
14 | | // throughput |
15 | 0 | while (end - buf >= 32) { |
16 | 0 | __m512i utf32_1 = _mm512_loadu_si512((const __m512i *)buf); |
17 | 0 | __m512i utf32_2 = _mm512_loadu_si512((const __m512i *)(buf + 16)); |
18 | 0 | buf += 32; |
19 | | |
20 | | // Process both blocks in parallel to maximize instruction-level parallelism |
21 | 0 | __m512i offsetmax_1 = _mm512_add_epi32(utf32_1, offset); |
22 | 0 | __m512i offsetmax_2 = _mm512_add_epi32(utf32_2, offset); |
23 | |
|
24 | 0 | currentoffsetmax = _mm512_max_epu32(offsetmax_1, currentoffsetmax); |
25 | 0 | currentmax = _mm512_max_epu32(utf32_1, currentmax); |
26 | |
|
27 | 0 | currentoffsetmax = _mm512_max_epu32(offsetmax_2, currentoffsetmax); |
28 | 0 | currentmax = _mm512_max_epu32(utf32_2, currentmax); |
29 | 0 | } |
30 | | |
31 | | // Handle remaining 16-31 values |
32 | 0 | if (end - buf >= 16) { |
33 | 0 | __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); |
34 | 0 | buf += 16; |
35 | 0 | currentoffsetmax = |
36 | 0 | _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); |
37 | 0 | currentmax = _mm512_max_epu32(utf32, currentmax); |
38 | 0 | } |
39 | | |
40 | | // Handle remaining 0-15 values with masked load |
41 | 0 | if (buf < end) { |
42 | 0 | __m512i utf32 = |
43 | 0 | _mm512_maskz_loadu_epi32(__mmask16((1 << (end - buf)) - 1), buf); |
44 | 0 | currentoffsetmax = |
45 | 0 | _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); |
46 | 0 | currentmax = _mm512_max_epu32(utf32, currentmax); |
47 | 0 | } |
48 | |
|
49 | 0 | const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); |
50 | 0 | const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); |
51 | 0 | const auto outside_range = _mm512_cmpgt_epu32_mask(currentmax, standardmax); |
52 | 0 | if (outside_range != 0) { |
53 | 0 | return false; |
54 | 0 | } |
55 | | |
56 | 0 | const auto surrogate = |
57 | 0 | _mm512_cmpgt_epu32_mask(currentoffsetmax, standardoffsetmax); |
58 | 0 | if (surrogate != 0) { |
59 | 0 | return false; |
60 | 0 | } |
61 | | |
62 | 0 | return true; |
63 | 0 | } |