/src/simdjson/include/simdjson/dom/parser-inl.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef SIMDJSON_PARSER_INL_H |
2 | | #define SIMDJSON_PARSER_INL_H |
3 | | |
4 | | #include "simdjson/dom/base.h" |
5 | | #include "simdjson/dom/document_stream.h" |
6 | | #include "simdjson/implementation.h" |
7 | | #include "simdjson/internal/dom_parser_implementation.h" |
8 | | |
9 | | #include "simdjson/error-inl.h" |
10 | | #include "simdjson/padded_string-inl.h" |
11 | | #include "simdjson/dom/document_stream-inl.h" |
12 | | #include "simdjson/dom/element-inl.h" |
13 | | |
14 | | #include <climits> |
15 | | #include <cstring> /* memcmp */ |
16 | | |
17 | | namespace simdjson { |
18 | | namespace dom { |
19 | | |
20 | | // |
21 | | // parser inline implementation |
22 | | // |
23 | | simdjson_inline parser::parser(size_t max_capacity) noexcept |
24 | 147k | : _max_capacity{max_capacity}, |
25 | 147k | loaded_bytes(nullptr) { |
26 | 147k | } |
27 | 144k | simdjson_inline parser::parser(parser &&other) noexcept = default; |
28 | 271k | simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; |
29 | | |
30 | 0 | inline bool parser::is_valid() const noexcept { return valid; } |
31 | 0 | inline int parser::get_error_code() const noexcept { return error; } |
32 | 0 | inline std::string parser::get_error_message() const noexcept { return error_message(error); } |
33 | | |
34 | 0 | inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { |
35 | 0 | return valid ? doc.dump_raw_tape(os) : false; |
36 | 0 | } |
37 | | |
38 | 0 | inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept { |
39 | 0 | // Open the file |
40 | 0 | SIMDJSON_PUSH_DISABLE_WARNINGS |
41 | 0 | SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe |
42 | 0 | std::FILE *fp = std::fopen(path.c_str(), "rb"); |
43 | 0 | SIMDJSON_POP_DISABLE_WARNINGS |
44 | 0 |
|
45 | 0 | if (fp == nullptr) { |
46 | 0 | return IO_ERROR; |
47 | 0 | } |
48 | 0 |
|
49 | 0 | // Get the file size |
50 | 0 | int ret; |
51 | 0 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
52 | 0 | ret = _fseeki64(fp, 0, SEEK_END); |
53 | 0 | #else |
54 | 0 | ret = std::fseek(fp, 0, SEEK_END); |
55 | 0 | #endif // _WIN64 |
56 | 0 | if(ret < 0) { |
57 | 0 | std::fclose(fp); |
58 | 0 | return IO_ERROR; |
59 | 0 | } |
60 | 0 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
61 | 0 | __int64 len = _ftelli64(fp); |
62 | 0 | if(len == -1L) { |
63 | 0 | std::fclose(fp); |
64 | 0 | return IO_ERROR; |
65 | 0 | } |
66 | 0 | #else |
67 | 0 | long len = std::ftell(fp); |
68 | 0 | if((len < 0) || (len == LONG_MAX)) { |
69 | 0 | std::fclose(fp); |
70 | 0 | return IO_ERROR; |
71 | 0 | } |
72 | 0 | #endif |
73 | 0 |
|
74 | 0 | // Make sure we have enough capacity to load the file |
75 | 0 | if (_loaded_bytes_capacity < size_t(len)) { |
76 | 0 | loaded_bytes.reset( internal::allocate_padded_buffer(len) ); |
77 | 0 | if (!loaded_bytes) { |
78 | 0 | std::fclose(fp); |
79 | 0 | return MEMALLOC; |
80 | 0 | } |
81 | 0 | _loaded_bytes_capacity = len; |
82 | 0 | } |
83 | 0 |
|
84 | 0 | // Read the string |
85 | 0 | std::rewind(fp); |
86 | 0 | size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); |
87 | 0 | if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { |
88 | 0 | return IO_ERROR; |
89 | 0 | } |
90 | 0 |
|
91 | 0 | return bytes_read; |
92 | 0 | } |
93 | | |
94 | 0 | inline simdjson_result<element> parser::load(const std::string &path) & noexcept { |
95 | 0 | return load_into_document(doc, path); |
96 | 0 | } |
97 | | |
98 | 0 | inline simdjson_result<element> parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { |
99 | 0 | size_t len; |
100 | 0 | auto _error = read_file(path).get(len); |
101 | 0 | if (_error) { return _error; } |
102 | 0 | return parse_into_document(provided_doc, loaded_bytes.get(), len, false); |
103 | 0 | } |
104 | | |
105 | 0 | inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept { |
106 | 0 | size_t len; |
107 | 0 | auto _error = read_file(path).get(len); |
108 | 0 | if (_error) { return _error; } |
109 | 0 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
110 | 0 | return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size); |
111 | 0 | } |
112 | | |
113 | 0 | inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
114 | 0 | // Important: we need to ensure that document has enough capacity. |
115 | 0 | // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! |
116 | 0 | error_code _error = ensure_capacity(provided_doc, len); |
117 | 0 | if (_error) { return _error; } |
118 | 0 | if (realloc_if_needed) { |
119 | 0 | // Make sure we have enough capacity to copy len bytes |
120 | 0 | if (!loaded_bytes || _loaded_bytes_capacity < len) { |
121 | 0 | loaded_bytes.reset( internal::allocate_padded_buffer(len) ); |
122 | 0 | if (!loaded_bytes) { |
123 | 0 | return MEMALLOC; |
124 | 0 | } |
125 | 0 | _loaded_bytes_capacity = len; |
126 | 0 | } |
127 | 0 | std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len); |
128 | 0 | buf = reinterpret_cast<const uint8_t*>(loaded_bytes.get()); |
129 | 0 | } |
130 | 0 |
|
131 | 0 | if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { |
132 | 0 | buf += 3; |
133 | 0 | len -= 3; |
134 | 0 | } |
135 | 0 | _error = implementation->parse(buf, len, provided_doc); |
136 | 0 |
|
137 | 0 | if (_error) { return _error; } |
138 | 0 |
|
139 | 0 | return provided_doc.root(); |
140 | 0 | } |
141 | | |
142 | 0 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
143 | 0 | return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
144 | 0 | } |
145 | 0 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { |
146 | 0 | return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); |
147 | 0 | } |
148 | 0 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { |
149 | 0 | return parse_into_document(provided_doc, s.data(), s.length(), false); |
150 | 0 | } |
151 | | |
152 | | |
153 | 101k | inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
154 | 101k | return parse_into_document(doc, buf, len, realloc_if_needed); |
155 | 101k | } |
156 | | |
157 | 31.1k | simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
158 | 31.1k | return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
159 | 31.1k | } |
160 | 10.6k | simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept { |
161 | 10.6k | return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); |
162 | 10.6k | } |
163 | 20.5k | simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept { |
164 | 20.5k | return parse(s.data(), s.length(), false); |
165 | 20.5k | } |
166 | 0 | simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept { |
167 | 0 | return parse(v.data(), v.length(), false); |
168 | 0 | } |
169 | | |
170 | 0 | inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { |
171 | 0 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
172 | 0 | if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { |
173 | 0 | buf += 3; |
174 | 0 | len -= 3; |
175 | 0 | } |
176 | 0 | return document_stream(*this, buf, len, batch_size); |
177 | 0 | } |
178 | 17.8k | inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { |
179 | 17.8k | return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size); |
180 | 17.8k | } |
181 | 0 | inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept { |
182 | 0 | return parse_many(s.data(), s.length(), batch_size); |
183 | 0 | } |
184 | 17.8k | inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept { |
185 | 17.8k | return parse_many(s.data(), s.length(), batch_size); |
186 | 17.8k | } |
187 | | |
188 | 232k | simdjson_inline size_t parser::capacity() const noexcept { |
189 | 232k | return implementation ? implementation->capacity() : 0; |
190 | 232k | } |
191 | 111k | simdjson_inline size_t parser::max_capacity() const noexcept { |
192 | 111k | return _max_capacity; |
193 | 111k | } |
194 | 111k | simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { |
195 | 111k | return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; |
196 | 111k | } |
197 | | |
198 | | simdjson_warn_unused |
199 | 111k | inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { |
200 | | // |
201 | | // Reallocate implementation if needed |
202 | | // |
203 | 111k | error_code err; |
204 | 111k | if (implementation) { |
205 | 0 | err = implementation->allocate(capacity, max_depth); |
206 | 111k | } else { |
207 | 111k | err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); |
208 | 111k | } |
209 | 111k | if (err) { return err; } |
210 | 111k | return SUCCESS; |
211 | 111k | } |
212 | | |
213 | | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
214 | | simdjson_warn_unused |
215 | | inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { |
216 | | return !allocate(capacity, max_depth); |
217 | | } |
218 | | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
219 | | |
220 | 19.2k | inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { |
221 | 19.2k | return ensure_capacity(doc, desired_capacity); |
222 | 19.2k | } |
223 | | |
224 | | |
225 | 120k | inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { |
226 | | // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. |
227 | | // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. |
228 | 120k | if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } |
229 | | // If we don't have enough capacity, (try to) automatically bump it. |
230 | | // If the document needs allocation, do it too. |
231 | | // Both in one if statement to minimize unlikely branching. |
232 | | // |
233 | | // Note: we must make sure that this function is called if capacity() == 0. We do so because we |
234 | | // ensure that desired_capacity > 0. |
235 | 120k | if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { |
236 | 111k | if (desired_capacity > max_capacity()) { |
237 | 0 | return error = CAPACITY; |
238 | 0 | } |
239 | 111k | error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; |
240 | 111k | error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; |
241 | 111k | if(err1 != SUCCESS) { return error = err1; } |
242 | 111k | if(err2 != SUCCESS) { return error = err2; } |
243 | 111k | } |
244 | 120k | return SUCCESS; |
245 | 120k | } Unexecuted instantiation: simdjson::dom::parser::ensure_capacity(simdjson::dom::document&, unsigned long) simdjson::dom::parser::ensure_capacity(simdjson::dom::document&, unsigned long) Line | Count | Source | 225 | 120k | inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { | 226 | | // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. | 227 | | // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. | 228 | 120k | if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } | 229 | | // If we don't have enough capacity, (try to) automatically bump it. | 230 | | // If the document needs allocation, do it too. | 231 | | // Both in one if statement to minimize unlikely branching. | 232 | | // | 233 | | // Note: we must make sure that this function is called if capacity() == 0. We do so because we | 234 | | // ensure that desired_capacity > 0. | 235 | 120k | if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { | 236 | 111k | if (desired_capacity > max_capacity()) { | 237 | 0 | return error = CAPACITY; | 238 | 0 | } | 239 | 111k | error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; | 240 | 111k | error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; | 241 | 111k | if(err1 != SUCCESS) { return error = err1; } | 242 | 111k | if(err2 != SUCCESS) { return error = err2; } | 243 | 111k | } | 244 | 120k | return SUCCESS; | 245 | 120k | } |
|
246 | | |
247 | 0 | simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { |
248 | 0 | if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { |
249 | 0 | _max_capacity = max_capacity; |
250 | 0 | } else { |
251 | 0 | _max_capacity = MINIMAL_DOCUMENT_CAPACITY; |
252 | 0 | } |
253 | 0 | } |
254 | | |
255 | | } // namespace dom |
256 | | } // namespace simdjson |
257 | | |
258 | | #endif // SIMDJSON_PARSER_INL_H |