/src/simdjson/src/icelake.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef SIMDJSON_SRC_ICELAKE_CPP |
2 | | #define SIMDJSON_SRC_ICELAKE_CPP |
3 | | |
4 | | #ifndef SIMDJSON_CONDITIONAL_INCLUDE |
5 | | #include <base.h> |
6 | | #endif // SIMDJSON_CONDITIONAL_INCLUDE |
7 | | |
8 | | #include <simdjson/icelake.h> |
9 | | #include <simdjson/icelake/implementation.h> |
10 | | |
11 | | // defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write |
12 | | #define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER |
13 | | |
14 | | #include <simdjson/icelake/begin.h> |
15 | | #include <generic/amalgamated.h> |
16 | | #include <generic/stage1/amalgamated.h> |
17 | | #include <generic/stage2/amalgamated.h> |
18 | | |
19 | | #undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER |
20 | | |
21 | | // |
22 | | // Stage 1 |
23 | | // |
24 | | |
25 | | namespace simdjson { |
26 | | namespace icelake { |
27 | | |
28 | | simdjson_warn_unused error_code implementation::create_dom_parser_implementation( |
29 | | size_t capacity, |
30 | | size_t max_depth, |
31 | | std::unique_ptr<internal::dom_parser_implementation>& dst |
32 | 0 | ) const noexcept { |
33 | 0 | dst.reset( new (std::nothrow) dom_parser_implementation() ); |
34 | 0 | if (!dst) { return MEMALLOC; } |
35 | 0 | if (auto err = dst->set_capacity(capacity)) |
36 | 0 | return err; |
37 | 0 | if (auto err = dst->set_max_depth(max_depth)) |
38 | 0 | return err; |
39 | 0 | return SUCCESS; |
40 | 0 | } |
41 | | |
42 | | namespace { |
43 | | |
44 | | using namespace simd; |
45 | | |
46 | | // This identifies structural characters (comma, colon, braces, brackets), |
47 | | // and ASCII white-space ('\r','\n','\t',' '). |
48 | 0 | simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) { |
49 | | // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why |
50 | | // we can't use the generic lookup_16. |
51 | 0 | const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); |
52 | | |
53 | | // The 6 operators (:,[]{}) have these values: |
54 | | // |
55 | | // , 2C |
56 | | // : 3A |
57 | | // [ 5B |
58 | | // { 7B |
59 | | // ] 5D |
60 | | // } 7D |
61 | | // |
62 | | // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. |
63 | | // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then |
64 | | // match it (against | 0x20). |
65 | | // |
66 | | // To prevent recognizing other characters, everything else gets compared with 0, which cannot |
67 | | // match due to the | 0x20. |
68 | | // |
69 | | // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like , |
70 | | // and :. This gets caught in stage 2, which checks the actual character to ensure the right |
71 | | // operators are in the right places. |
72 | 0 | const auto op_table = simd8<uint8_t>::repeat_16( |
73 | 0 | 0, 0, 0, 0, |
74 | 0 | 0, 0, 0, 0, |
75 | 0 | 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B |
76 | 0 | ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D |
77 | 0 | ); |
78 | | |
79 | | // We compute whitespace and op separately. If later code only uses one or the |
80 | | // other, given the fact that all functions are aggressively inlined, we can |
81 | | // hope that useless computations will be omitted. This is namely case when |
82 | | // minifying (we only need whitespace). |
83 | |
|
84 | 0 | const uint64_t whitespace = in.eq({ |
85 | 0 | _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) |
86 | 0 | }); |
87 | | // Turn [ and ] into { and } |
88 | 0 | const simd8x64<uint8_t> curlified{ |
89 | 0 | in.chunks[0] | 0x20 |
90 | 0 | }; |
91 | 0 | const uint64_t op = curlified.eq({ |
92 | 0 | _mm512_shuffle_epi8(op_table, in.chunks[0]) |
93 | 0 | }); |
94 | |
|
95 | 0 | return { whitespace, op }; |
96 | 0 | } |
97 | | |
98 | 0 | simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) { |
99 | 0 | return input.reduce_or().is_ascii(); |
100 | 0 | } |
101 | | |
102 | 0 | simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { |
103 | 0 | simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 |
104 | 0 | simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 |
105 | 0 | simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 |
106 | 0 | // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. |
107 | 0 | return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); |
108 | 0 | } |
109 | | |
110 | 0 | simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) { |
111 | 0 | simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 |
112 | 0 | simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 |
113 | | // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. |
114 | 0 | return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0); |
115 | 0 | } |
116 | | |
117 | | } // unnamed namespace |
118 | | } // namespace icelake |
119 | | } // namespace simdjson |
120 | | |
121 | | /** |
122 | | * We provide a custom version of bit_indexer::write using |
123 | | * naked intrinsics. |
124 | | * TODO: make this code more elegant. |
125 | | */ |
126 | | // Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. |
127 | | // as a workaround, we disable warnings within the following function. |
128 | | SIMDJSON_PUSH_DISABLE_ALL_WARNINGS |
129 | | namespace simdjson { namespace icelake { namespace { namespace stage1 { |
130 | 0 | simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { |
131 | | // In some instances, the next branch is expensive because it is mispredicted. |
132 | | // Unfortunately, in other cases, |
133 | | // it helps tremendously. |
134 | 0 | if (bits == 0) { return; } |
135 | | |
136 | 0 | const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( |
137 | 0 | 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, |
138 | 0 | 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, |
139 | 0 | 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, |
140 | 0 | 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 |
141 | 0 | )); |
142 | 0 | const __m512i start_index = _mm512_set1_epi32(idx); |
143 | |
|
144 | 0 | const auto count = count_ones(bits); |
145 | 0 | __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); |
146 | 0 | _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); |
147 | |
|
148 | 0 | if(count > 16) { |
149 | 0 | const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); |
150 | 0 | _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); |
151 | 0 | if(count > 32) { |
152 | 0 | const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); |
153 | 0 | _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); |
154 | 0 | if(count > 48) { |
155 | 0 | const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); |
156 | 0 | _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); |
157 | 0 | } |
158 | 0 | } |
159 | 0 | } |
160 | 0 | this->tail += count; |
161 | 0 | } |
162 | | }}}} |
163 | | SIMDJSON_POP_DISABLE_WARNINGS |
164 | | |
165 | | // |
166 | | // Stage 2 |
167 | | // |
168 | | |
169 | | // |
170 | | // Implementation-specific overrides |
171 | | // |
172 | | namespace simdjson { |
173 | | namespace icelake { |
174 | | |
175 | 0 | simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { |
176 | 0 | return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); |
177 | 0 | } |
178 | | |
179 | 0 | simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { |
180 | 0 | this->buf = _buf; |
181 | 0 | this->len = _len; |
182 | 0 | return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); |
183 | 0 | } |
184 | | |
185 | 0 | simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { |
186 | 0 | return icelake::stage1::generic_validate_utf8(buf,len); |
187 | 0 | } |
188 | | |
189 | 0 | simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { |
190 | 0 | return stage2::tape_builder::parse_document<false>(*this, _doc); |
191 | 0 | } |
192 | | |
193 | 0 | simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { |
194 | 0 | return stage2::tape_builder::parse_document<true>(*this, _doc); |
195 | 0 | } |
196 | | |
197 | 0 | simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { |
198 | 0 | return icelake::stringparsing::parse_string(src, dst, replacement_char); |
199 | 0 | } |
200 | | |
201 | 0 | simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { |
202 | 0 | return icelake::stringparsing::parse_wobbly_string(src, dst); |
203 | 0 | } |
204 | | |
205 | 0 | simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { |
206 | 0 | auto error = stage1(_buf, _len, stage1_mode::regular); |
207 | 0 | if (error) { return error; } |
208 | 0 | return stage2(_doc); |
209 | 0 | } |
210 | | |
211 | | } // namespace icelake |
212 | | } // namespace simdjson |
213 | | |
214 | | #include <simdjson/icelake/end.h> |
215 | | |
216 | | #endif // SIMDJSON_SRC_ICELAKE_CPP |