Coverage Report

Created: 2025-07-23 06:33

/src/simdjson/include/simdjson/dom/parser-inl.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef SIMDJSON_PARSER_INL_H
2
#define SIMDJSON_PARSER_INL_H
3
4
#include "simdjson/dom/base.h"
5
#include "simdjson/dom/document_stream.h"
6
#include "simdjson/implementation.h"
7
#include "simdjson/internal/dom_parser_implementation.h"
8
9
#include "simdjson/error-inl.h"
10
#include "simdjson/padded_string-inl.h"
11
#include "simdjson/dom/document_stream-inl.h"
12
#include "simdjson/dom/element-inl.h"
13
14
#include <climits>
15
#include <cstring> /* memcmp */
16
17
namespace simdjson {
18
namespace dom {
19
20
//
21
// parser inline implementation
22
//
23
simdjson_inline parser::parser(size_t max_capacity) noexcept
24
147k
  : _max_capacity{max_capacity},
25
147k
    loaded_bytes(nullptr) {
26
147k
}
27
144k
simdjson_inline parser::parser(parser &&other) noexcept = default;
28
271k
simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
29
30
0
inline bool parser::is_valid() const noexcept { return valid; }
31
0
inline int parser::get_error_code() const noexcept { return error; }
32
0
inline std::string parser::get_error_message() const noexcept { return error_message(error); }
33
34
0
inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
35
0
  return valid ? doc.dump_raw_tape(os) : false;
36
0
}
37
38
0
inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept {
39
0
  // Open the file
40
0
  SIMDJSON_PUSH_DISABLE_WARNINGS
41
0
  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
42
0
  std::FILE *fp = std::fopen(path.c_str(), "rb");
43
0
  SIMDJSON_POP_DISABLE_WARNINGS
44
0
45
0
  if (fp == nullptr) {
46
0
    return IO_ERROR;
47
0
  }
48
0
49
0
  // Get the file size
50
0
  int ret;
51
0
#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
52
0
  ret = _fseeki64(fp, 0, SEEK_END);
53
0
#else
54
0
  ret = std::fseek(fp, 0, SEEK_END);
55
0
#endif // _WIN64
56
0
  if(ret < 0) {
57
0
    std::fclose(fp);
58
0
    return IO_ERROR;
59
0
  }
60
0
#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
61
0
  __int64 len = _ftelli64(fp);
62
0
  if(len == -1L) {
63
0
    std::fclose(fp);
64
0
    return IO_ERROR;
65
0
  }
66
0
#else
67
0
  long len = std::ftell(fp);
68
0
  if((len < 0) || (len == LONG_MAX)) {
69
0
    std::fclose(fp);
70
0
    return IO_ERROR;
71
0
  }
72
0
#endif
73
0
74
0
  // Make sure we have enough capacity to load the file
75
0
  if (_loaded_bytes_capacity < size_t(len)) {
76
0
    loaded_bytes.reset( internal::allocate_padded_buffer(len) );
77
0
    if (!loaded_bytes) {
78
0
      std::fclose(fp);
79
0
      return MEMALLOC;
80
0
    }
81
0
    _loaded_bytes_capacity = len;
82
0
  }
83
0
84
0
  // Read the string
85
0
  std::rewind(fp);
86
0
  size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
87
0
  if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
88
0
    return IO_ERROR;
89
0
  }
90
0
91
0
  return bytes_read;
92
0
}
93
94
0
inline simdjson_result<element> parser::load(const std::string &path) & noexcept {
95
0
  return load_into_document(doc, path);
96
0
}
97
98
0
inline simdjson_result<element> parser::load_into_document(document& provided_doc, const std::string &path) & noexcept {
99
0
  size_t len;
100
0
  auto _error = read_file(path).get(len);
101
0
  if (_error) { return _error; }
102
0
  return parse_into_document(provided_doc, loaded_bytes.get(), len, false);
103
0
}
104
105
0
inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept {
106
0
  size_t len;
107
0
  auto _error = read_file(path).get(len);
108
0
  if (_error) { return _error; }
109
0
  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
110
0
  return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
111
0
}
112
113
0
inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
114
0
  // Important: we need to ensure that document has enough capacity.
115
0
  // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
116
0
  error_code _error = ensure_capacity(provided_doc, len);
117
0
  if (_error) { return _error; }
118
0
  if (realloc_if_needed) {
119
0
    // Make sure we have enough capacity to copy len bytes
120
0
    if (!loaded_bytes || _loaded_bytes_capacity < len) {
121
0
      loaded_bytes.reset( internal::allocate_padded_buffer(len) );
122
0
      if (!loaded_bytes) {
123
0
        return MEMALLOC;
124
0
      }
125
0
      _loaded_bytes_capacity = len;
126
0
    }
127
0
    std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
128
0
    buf = reinterpret_cast<const uint8_t*>(loaded_bytes.get());
129
0
  }
130
0
131
0
  if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
132
0
    buf += 3;
133
0
    len -= 3;
134
0
  }
135
0
  _error = implementation->parse(buf, len, provided_doc);
136
0
137
0
  if (_error) { return _error; }
138
0
139
0
  return provided_doc.root();
140
0
}
141
142
0
simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
143
0
  return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
144
0
}
145
0
simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
146
0
  return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
147
0
}
148
0
simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
149
0
  return parse_into_document(provided_doc, s.data(), s.length(), false);
150
0
}
151
152
153
101k
inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
154
101k
  return parse_into_document(doc, buf, len, realloc_if_needed);
155
101k
}
156
157
31.1k
simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
158
31.1k
  return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
159
31.1k
}
160
10.6k
simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
161
10.6k
  return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
162
10.6k
}
163
20.5k
simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
164
20.5k
  return parse(s.data(), s.length(), false);
165
20.5k
}
166
0
simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept {
167
0
  return parse(v.data(), v.length(), false);
168
0
}
169
170
0
inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
171
0
  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
172
0
  if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
173
0
    buf += 3;
174
0
    len -= 3;
175
0
  }
176
0
  return document_stream(*this, buf, len, batch_size);
177
0
}
178
17.8k
inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
179
17.8k
  return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
180
17.8k
}
181
0
inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
182
0
  return parse_many(s.data(), s.length(), batch_size);
183
0
}
184
17.8k
inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
185
17.8k
  return parse_many(s.data(), s.length(), batch_size);
186
17.8k
}
187
188
232k
simdjson_inline size_t parser::capacity() const noexcept {
189
232k
  return implementation ? implementation->capacity() : 0;
190
232k
}
191
111k
simdjson_inline size_t parser::max_capacity() const noexcept {
192
111k
  return _max_capacity;
193
111k
}
194
111k
simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
195
111k
  return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
196
111k
}
197
198
simdjson_warn_unused
199
111k
inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
200
  //
201
  // Reallocate implementation if needed
202
  //
203
111k
  error_code err;
204
111k
  if (implementation) {
205
0
    err = implementation->allocate(capacity, max_depth);
206
111k
  } else {
207
111k
    err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
208
111k
  }
209
111k
  if (err) { return err; }
210
111k
  return SUCCESS;
211
111k
}
212
213
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
214
simdjson_warn_unused
215
inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
216
  return !allocate(capacity, max_depth);
217
}
218
#endif // SIMDJSON_DISABLE_DEPRECATED_API
219
220
19.2k
inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
221
19.2k
  return ensure_capacity(doc, desired_capacity);
222
19.2k
}
223
224
225
120k
inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
226
  // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
227
  // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
228
120k
  if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
229
  // If we don't have enough capacity, (try to) automatically bump it.
230
  // If the document needs allocation, do it too.
231
  // Both in one if statement to minimize unlikely branching.
232
  //
233
  // Note: we must make sure that this function is called if capacity() == 0. We do so because we
234
  // ensure that desired_capacity > 0.
235
120k
  if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
236
111k
    if (desired_capacity > max_capacity()) {
237
0
      return error = CAPACITY;
238
0
    }
239
111k
    error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
240
111k
    error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
241
111k
    if(err1 != SUCCESS) { return error = err1; }
242
111k
    if(err2 != SUCCESS) { return error = err2; }
243
111k
  }
244
120k
  return SUCCESS;
245
120k
}
Unexecuted instantiation: simdjson::dom::parser::ensure_capacity(simdjson::dom::document&, unsigned long)
simdjson::dom::parser::ensure_capacity(simdjson::dom::document&, unsigned long)
Line
Count
Source
225
120k
inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
226
  // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
227
  // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
228
120k
  if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
229
  // If we don't have enough capacity, (try to) automatically bump it.
230
  // If the document needs allocation, do it too.
231
  // Both in one if statement to minimize unlikely branching.
232
  //
233
  // Note: we must make sure that this function is called if capacity() == 0. We do so because we
234
  // ensure that desired_capacity > 0.
235
120k
  if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
236
111k
    if (desired_capacity > max_capacity()) {
237
0
      return error = CAPACITY;
238
0
    }
239
111k
    error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
240
111k
    error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
241
111k
    if(err1 != SUCCESS) { return error = err1; }
242
111k
    if(err2 != SUCCESS) { return error = err2; }
243
111k
  }
244
120k
  return SUCCESS;
245
120k
}
246
247
0
simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
248
0
  if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) {
249
0
    _max_capacity = max_capacity;
250
0
  } else {
251
0
    _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
252
0
  }
253
0
}
254
255
} // namespace dom
256
} // namespace simdjson
257
258
#endif // SIMDJSON_PARSER_INL_H