/src/tomlplusplus/include/toml++/impl/parser.inl
Line | Count | Source |
1 | | //# This file is a part of toml++ and is subject to the the terms of the MIT license. |
2 | | //# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au> |
3 | | //# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. |
4 | | // SPDX-License-Identifier: MIT |
5 | | #pragma once |
6 | | |
7 | | #include "preprocessor.hpp" |
8 | | //# {{ |
9 | | #if !TOML_IMPLEMENTATION |
10 | | #error This is an implementation-only header. |
11 | | #endif |
12 | | //# }} |
13 | | #if TOML_ENABLE_PARSER |
14 | | |
15 | | #include "parser.hpp" |
16 | | #include "std_optional.hpp" |
17 | | #include "source_region.hpp" |
18 | | #include "parse_error.hpp" |
19 | | #include "date_time.hpp" |
20 | | #include "value.hpp" |
21 | | #include "array.hpp" |
22 | | #include "table.hpp" |
23 | | #include "unicode.hpp" |
24 | | TOML_DISABLE_WARNINGS; |
25 | | #include <istream> |
26 | | #include <fstream> |
27 | | #if TOML_INT_CHARCONV || TOML_FLOAT_CHARCONV |
28 | | #include <charconv> |
29 | | #endif |
30 | | #if !TOML_INT_CHARCONV || !TOML_FLOAT_CHARCONV |
31 | | #include <sstream> |
32 | | #endif |
33 | | #if !TOML_INT_CHARCONV |
34 | | #include <iomanip> |
35 | | #endif |
36 | | TOML_ENABLE_WARNINGS; |
37 | | #include "header_start.hpp" |
38 | | |
39 | | //#--------------------------------------------------------------------------------------------------------------------- |
40 | | //# UTF8 STREAMS |
41 | | //#--------------------------------------------------------------------------------------------------------------------- |
42 | | |
43 | | TOML_ANON_NAMESPACE_START |
44 | | { |
45 | | template <typename T> |
46 | | class utf8_byte_stream; |
47 | | |
48 | | TOML_INTERNAL_LINKAGE |
49 | | constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv; |
50 | | |
51 | | template <typename Char> |
52 | | class utf8_byte_stream<std::basic_string_view<Char>> |
53 | | { |
54 | | static_assert(sizeof(Char) == 1); |
55 | | |
56 | | private: |
57 | | std::basic_string_view<Char> source_; |
58 | | size_t position_ = {}; |
59 | | |
60 | | public: |
61 | | TOML_NODISCARD_CTOR |
62 | | explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept // |
63 | 7.13k | : source_{ sv } |
64 | 7.13k | { |
65 | | // skip bom |
66 | 7.13k | if (source_.length() >= 3u && memcmp(utf8_byte_order_mark.data(), source_.data(), 3u) == 0) |
67 | 22 | position_ += 3u; |
68 | 7.13k | } |
69 | | |
70 | | TOML_CONST_INLINE_GETTER |
71 | | constexpr bool error() const noexcept |
72 | 1.44M | { |
73 | 1.44M | return false; |
74 | 1.44M | } |
75 | | |
76 | | TOML_PURE_INLINE_GETTER |
77 | | constexpr bool eof() const noexcept |
78 | 2.90M | { |
79 | 2.90M | return position_ >= source_.length(); |
80 | 2.90M | } |
81 | | |
82 | | TOML_PURE_INLINE_GETTER |
83 | | explicit constexpr operator bool() const noexcept |
84 | 1.44M | { |
85 | 1.44M | return !eof(); |
86 | 1.44M | } |
87 | | |
88 | | TOML_PURE_INLINE_GETTER |
89 | | constexpr bool peek_eof() const noexcept |
90 | 7.13k | { |
91 | 7.13k | return eof(); |
92 | 7.13k | } |
93 | | |
94 | | TOML_NODISCARD |
95 | | TOML_ATTR(nonnull) |
96 | | size_t operator()(void* dest, size_t num) noexcept |
97 | 1.44M | { |
98 | 1.44M | TOML_ASSERT_ASSUME(!eof()); |
99 | | |
100 | 1.44M | num = impl::min(position_ + num, source_.length()) - position_; |
101 | 1.44M | std::memcpy(dest, source_.data() + position_, num); |
102 | 1.44M | position_ += num; |
103 | 1.44M | return num; |
104 | 1.44M | } |
105 | | }; |
106 | | |
107 | | template <> |
108 | | class utf8_byte_stream<std::istream> |
109 | | { |
110 | | private: |
111 | | std::istream* source_; |
112 | | |
113 | | public: |
114 | | TOML_NODISCARD_CTOR |
115 | | explicit utf8_byte_stream(std::istream& stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) // |
116 | | : source_{ &stream } |
117 | 0 | { |
118 | 0 | if (!*this) // eof, bad |
119 | 0 | return; |
120 | 0 |
|
121 | 0 | const auto initial_pos = source_->tellg(); |
122 | 0 | char bom[3]; |
123 | 0 | source_->read(bom, 3); |
124 | 0 | if (source_->bad() || (source_->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3u) == 0)) |
125 | 0 | return; |
126 | 0 |
|
127 | 0 | source_->clear(); |
128 | 0 | source_->seekg(initial_pos, std::istream::beg); |
129 | 0 | } |
130 | | |
131 | | TOML_PURE_INLINE_GETTER |
132 | | bool error() const noexcept |
133 | 0 | { |
134 | 0 | return !!(source_->rdstate() & std::istream::badbit); |
135 | 0 | } |
136 | | |
137 | | TOML_PURE_INLINE_GETTER |
138 | | bool eof() const noexcept |
139 | 0 | { |
140 | 0 | return !!(source_->rdstate() & std::istream::eofbit); |
141 | 0 | } |
142 | | |
143 | | TOML_PURE_INLINE_GETTER |
144 | | explicit operator bool() const noexcept |
145 | 0 | { |
146 | 0 | return !(source_->rdstate() & (std::istream::badbit | std::istream::eofbit)); |
147 | 0 | } |
148 | | |
149 | | TOML_NODISCARD |
150 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
151 | 0 | { |
152 | 0 | return eof() || source_->peek() == std::istream::traits_type::eof(); |
153 | 0 | } |
154 | | |
155 | | TOML_NODISCARD |
156 | | TOML_ATTR(nonnull) |
157 | | size_t operator()(void* dest, size_t num) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
158 | 0 | { |
159 | 0 | TOML_ASSERT(*this); |
160 | 0 |
|
161 | 0 | source_->read(static_cast<char*>(dest), static_cast<std::streamsize>(num)); |
162 | 0 | return static_cast<size_t>(source_->gcount()); |
163 | 0 | } |
164 | | }; |
165 | | |
166 | | struct utf8_codepoint |
167 | | { |
168 | | char32_t value; |
169 | | char bytes[4]; |
170 | | size_t count; |
171 | | source_position position; |
172 | | |
173 | | TOML_PURE_INLINE_GETTER |
174 | | constexpr operator const char32_t&() const noexcept |
175 | 161M | { |
176 | 161M | return value; |
177 | 161M | } |
178 | | |
179 | | TOML_PURE_INLINE_GETTER |
180 | | constexpr const char32_t& operator*() const noexcept |
181 | 1.16M | { |
182 | 1.16M | return value; |
183 | 1.16M | } |
184 | | }; |
185 | | static_assert(std::is_trivially_default_constructible_v<utf8_codepoint> && std::is_trivially_copyable_v<utf8_codepoint>); |
186 | | static_assert(std::is_standard_layout_v<utf8_codepoint>); |
187 | | |
188 | | struct TOML_ABSTRACT_INTERFACE utf8_reader_interface |
189 | | { |
190 | | TOML_NODISCARD |
191 | | virtual const source_path_ptr& source_path() const noexcept = 0; |
192 | | |
193 | | TOML_NODISCARD |
194 | | virtual const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0; |
195 | | |
196 | | TOML_NODISCARD |
197 | | virtual bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0; |
198 | | |
199 | | #if !TOML_EXCEPTIONS |
200 | | |
201 | | TOML_NODISCARD |
202 | | virtual optional<parse_error>&& error() noexcept = 0; |
203 | | |
204 | | #endif |
205 | | |
206 | 7.13k | virtual ~utf8_reader_interface() noexcept = default; |
207 | | }; |
208 | | |
209 | | #if TOML_EXCEPTIONS |
210 | 109 | #define utf8_reader_error(...) throw parse_error(__VA_ARGS__) |
211 | 0 | #define utf8_reader_return_after_error(...) static_assert(true) |
212 | 46.0M | #define utf8_reader_error_check(...) static_assert(true) |
213 | | #else |
214 | | #define utf8_reader_error(...) err_.emplace(__VA_ARGS__) |
215 | | #define utf8_reader_return_after_error(...) return __VA_ARGS__ |
216 | | #define utf8_reader_error_check(...) \ |
217 | | do \ |
218 | | { \ |
219 | | if TOML_UNLIKELY(err_) \ |
220 | | return __VA_ARGS__; \ |
221 | | } \ |
222 | | while (false) |
223 | | |
224 | | #endif |
225 | | |
226 | | #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) |
227 | | #define TOML_OVERALIGNED |
228 | | #else |
229 | 1.44M | #define TOML_OVERALIGNED alignas(32) |
230 | | #endif |
231 | | |
232 | | template <typename T> |
233 | | class TOML_EMPTY_BASES utf8_reader final : public utf8_reader_interface |
234 | | { |
235 | | private: |
236 | | static constexpr size_t block_capacity = 32; |
237 | | utf8_byte_stream<T> stream_; |
238 | | source_position next_pos_ = { 1, 1 }; |
239 | | |
240 | | impl::utf8_decoder decoder_; |
241 | | struct currently_decoding_t |
242 | | { |
243 | | char bytes[4]; |
244 | | size_t count; |
245 | | } currently_decoding_; |
246 | | |
247 | | struct codepoints_t |
248 | | { |
249 | | TOML_OVERALIGNED utf8_codepoint buffer[block_capacity]; |
250 | | size_t current; |
251 | | size_t count; |
252 | | } codepoints_; |
253 | | |
254 | | source_path_ptr source_path_; |
255 | | |
256 | | #if !TOML_EXCEPTIONS |
257 | | optional<parse_error> err_; |
258 | | #endif |
259 | | |
260 | | bool read_next_block() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
261 | 1.44M | { |
262 | 1.44M | TOML_ASSERT(stream_); |
263 | | |
264 | 1.44M | TOML_OVERALIGNED char raw_bytes[block_capacity]; |
265 | 1.44M | size_t raw_bytes_read; |
266 | | |
267 | | // read the next raw (encoded) block in from the stream |
268 | | if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) |
269 | 1.44M | { |
270 | 1.44M | raw_bytes_read = stream_(raw_bytes, block_capacity); |
271 | | } |
272 | | #if TOML_EXCEPTIONS |
273 | | else |
274 | | { |
275 | | try |
276 | | { |
277 | | raw_bytes_read = stream_(raw_bytes, block_capacity); |
278 | | } |
279 | | catch (const std::exception& exc) |
280 | | { |
281 | | throw parse_error{ exc.what(), next_pos_, source_path_ }; |
282 | | } |
283 | | catch (...) |
284 | | { |
285 | | throw parse_error{ "An unspecified error occurred", next_pos_, source_path_ }; |
286 | | } |
287 | | } |
288 | 1.44M | #endif // TOML_EXCEPTIONS |
289 | | |
290 | | // handle a zero-byte read |
291 | 1.44M | if TOML_UNLIKELY(!raw_bytes_read) |
292 | 0 | { |
293 | 0 | if (stream_.eof()) |
294 | 0 | { |
295 | | // EOF only sets the error state if the decoder wants more input, otherwise |
296 | | // a zero-byte read might have just caused the underlying stream to realize it's exhaused and set |
297 | | // the EOF flag, and that's totally fine |
298 | 0 | if (decoder_.needs_more_input()) |
299 | 0 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", |
300 | 0 | next_pos_, |
301 | 0 | source_path_); |
302 | 0 | } |
303 | 0 | else |
304 | 0 | { |
305 | 0 | utf8_reader_error("Reading from the underlying stream failed - zero bytes read", |
306 | 0 | next_pos_, |
307 | 0 | source_path_); |
308 | 0 | } |
309 | 0 | return false; |
310 | 0 | } |
311 | | |
312 | 1.44M | TOML_ASSERT_ASSUME(raw_bytes_read); |
313 | 1.44M | std::memset(&codepoints_, 0, sizeof(codepoints_)); |
314 | | |
315 | | // helper for calculating decoded codepoint line+cols |
316 | 1.44M | const auto calc_positions = [&]() noexcept |
317 | 1.44M | { |
318 | 47.4M | for (size_t i = 0; i < codepoints_.count; i++) |
319 | 46.0M | { |
320 | 46.0M | auto& cp = codepoints_.buffer[i]; |
321 | 46.0M | cp.position = next_pos_; |
322 | | |
323 | 46.0M | if (cp == U'\n') |
324 | 2.14M | { |
325 | 2.14M | next_pos_.line++; |
326 | 2.14M | next_pos_.column = source_index{ 1 }; |
327 | 2.14M | } |
328 | 43.8M | else |
329 | 43.8M | next_pos_.column++; |
330 | 46.0M | } |
331 | 1.44M | }; |
332 | | |
333 | | // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain ASCII |
334 | 1.44M | const auto ascii_fast_path = !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read); |
335 | | |
336 | | // ASCII fast-path |
337 | 1.44M | if (ascii_fast_path) |
338 | 1.43M | { |
339 | 1.43M | decoder_.reset(); |
340 | 1.43M | currently_decoding_.count = {}; |
341 | | |
342 | 1.43M | codepoints_.count = raw_bytes_read; |
343 | 47.1M | for (size_t i = 0; i < codepoints_.count; i++) |
344 | 45.7M | { |
345 | 45.7M | auto& cp = codepoints_.buffer[i]; |
346 | 45.7M | cp.value = static_cast<char32_t>(raw_bytes[i]); |
347 | 45.7M | cp.bytes[0] = raw_bytes[i]; |
348 | 45.7M | cp.count = 1u; |
349 | 45.7M | } |
350 | 1.43M | } |
351 | | |
352 | | // UTF-8 slow-path |
353 | 10.7k | else |
354 | 10.7k | { |
355 | | // helper for getting precise error location |
356 | 10.7k | const auto error_pos = [&]() noexcept -> const source_position& |
357 | 10.7k | { // |
358 | 109 | return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position : next_pos_; |
359 | 109 | }; |
360 | | |
361 | 315k | for (size_t i = 0; i < raw_bytes_read; i++) |
362 | 304k | { |
363 | 304k | decoder_(static_cast<uint8_t>(raw_bytes[i])); |
364 | 304k | if TOML_UNLIKELY(decoder_.error()) |
365 | 78 | { |
366 | 78 | calc_positions(); |
367 | 78 | utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_); |
368 | 0 | utf8_reader_return_after_error(false); |
369 | 0 | } |
370 | | |
371 | 304k | currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i]; |
372 | | |
373 | 304k | if (decoder_.has_code_point()) |
374 | 271k | { |
375 | 271k | auto& cp = codepoints_.buffer[codepoints_.count++]; |
376 | | |
377 | 271k | cp.value = decoder_.codepoint; |
378 | 271k | cp.count = currently_decoding_.count; |
379 | 271k | std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count); |
380 | 271k | currently_decoding_.count = {}; |
381 | 271k | } |
382 | 32.6k | else if TOML_UNLIKELY(currently_decoding_.count == 4u) |
383 | 0 | { |
384 | 0 | calc_positions(); |
385 | 0 | utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_); |
386 | 0 | utf8_reader_return_after_error(false); |
387 | 0 | } |
388 | 304k | } |
389 | 10.6k | if TOML_UNLIKELY(decoder_.needs_more_input() && stream_.eof()) |
390 | 31 | { |
391 | 31 | calc_positions(); |
392 | 31 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", |
393 | 31 | error_pos(), |
394 | 31 | source_path_); |
395 | 0 | utf8_reader_return_after_error(false); |
396 | 0 | } |
397 | 10.6k | } |
398 | | |
399 | 1.44M | TOML_ASSERT_ASSUME(codepoints_.count); |
400 | 1.44M | calc_positions(); |
401 | | |
402 | | // handle general I/O errors |
403 | | // (down here so the next_pos_ benefits from calc_positions()) |
404 | 1.44M | if TOML_UNLIKELY(stream_.error()) |
405 | 0 | { |
406 | 0 | utf8_reader_error("An I/O error occurred while reading from the underlying stream", |
407 | 0 | next_pos_, |
408 | 0 | source_path_); |
409 | 0 | utf8_reader_return_after_error(false); |
410 | 0 | } |
411 | | |
412 | 1.44M | return true; |
413 | 1.44M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::read_next_block() Line | Count | Source | 261 | 1.44M | { | 262 | 1.44M | TOML_ASSERT(stream_); | 263 | | | 264 | 1.44M | TOML_OVERALIGNED char raw_bytes[block_capacity]; | 265 | 1.44M | size_t raw_bytes_read; | 266 | | | 267 | | // read the next raw (encoded) block in from the stream | 268 | | if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) | 269 | 1.44M | { | 270 | 1.44M | raw_bytes_read = stream_(raw_bytes, block_capacity); | 271 | | } | 272 | | #if TOML_EXCEPTIONS | 273 | | else | 274 | | { | 275 | | try | 276 | | { | 277 | | raw_bytes_read = stream_(raw_bytes, block_capacity); | 278 | | } | 279 | | catch (const std::exception& exc) | 280 | | { | 281 | | throw parse_error{ exc.what(), next_pos_, source_path_ }; | 282 | | } | 283 | | catch (...) | 284 | | { | 285 | | throw parse_error{ "An unspecified error occurred", next_pos_, source_path_ }; | 286 | | } | 287 | | } | 288 | 1.44M | #endif // TOML_EXCEPTIONS | 289 | | | 290 | | // handle a zero-byte read | 291 | 1.44M | if TOML_UNLIKELY(!raw_bytes_read) | 292 | 0 | { | 293 | 0 | if (stream_.eof()) | 294 | 0 | { | 295 | | // EOF only sets the error state if the decoder wants more input, otherwise | 296 | | // a zero-byte read might have just caused the underlying stream to realize it's exhaused and set | 297 | | // the EOF flag, and that's totally fine | 298 | 0 | if (decoder_.needs_more_input()) | 299 | 0 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", | 300 | 0 | next_pos_, | 301 | 0 | source_path_); | 302 | 0 | } | 303 | 0 | else | 304 | 0 | { | 305 | 0 | utf8_reader_error("Reading from the underlying stream failed - zero bytes read", | 306 | 0 | next_pos_, | 307 | 0 | source_path_); | 308 | 0 | } | 309 | 0 | return false; | 310 | 0 | } | 311 | | | 312 | 1.44M | TOML_ASSERT_ASSUME(raw_bytes_read); | 313 | 1.44M | std::memset(&codepoints_, 0, sizeof(codepoints_)); | 314 | | | 315 | | // helper for calculating decoded codepoint line+cols | 316 | 1.44M | const auto calc_positions = [&]() noexcept | 317 | 1.44M | { | 318 | 1.44M | for (size_t i = 0; i < codepoints_.count; i++) | 319 | 1.44M | { | 320 | 1.44M | auto& cp = codepoints_.buffer[i]; | 321 | 1.44M | cp.position = next_pos_; | 322 | | | 323 | 1.44M | if (cp == U'\n') | 324 | 1.44M | { | 325 | 1.44M | next_pos_.line++; | 326 | 1.44M | next_pos_.column = source_index{ 1 }; | 327 | 1.44M | } | 328 | 1.44M | else | 329 | 1.44M | next_pos_.column++; | 330 | 1.44M | } | 331 | 1.44M | }; | 332 | | | 333 | | // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain ASCII | 334 | 1.44M | const auto ascii_fast_path = !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read); | 335 | | | 336 | | // ASCII fast-path | 337 | 1.44M | if (ascii_fast_path) | 338 | 1.43M | { | 339 | 1.43M | decoder_.reset(); | 340 | 1.43M | currently_decoding_.count = {}; | 341 | | | 342 | 1.43M | codepoints_.count = raw_bytes_read; | 343 | 47.1M | for (size_t i = 0; i < codepoints_.count; i++) | 344 | 45.7M | { | 345 | 45.7M | auto& cp = codepoints_.buffer[i]; | 346 | 45.7M | cp.value = static_cast<char32_t>(raw_bytes[i]); | 347 | 45.7M | cp.bytes[0] = raw_bytes[i]; | 348 | 45.7M | cp.count = 1u; | 349 | 45.7M | } | 350 | 1.43M | } | 351 | | | 352 | | // UTF-8 slow-path | 353 | 10.7k | else | 354 | 10.7k | { | 355 | | // helper for getting precise error location | 356 | 10.7k | const auto error_pos = [&]() noexcept -> const source_position& | 357 | 10.7k | { // | 358 | 10.7k | return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position : next_pos_; | 359 | 10.7k | }; | 360 | | | 361 | 315k | for (size_t i = 0; i < raw_bytes_read; i++) | 362 | 304k | { | 363 | 304k | decoder_(static_cast<uint8_t>(raw_bytes[i])); | 364 | 304k | if TOML_UNLIKELY(decoder_.error()) | 365 | 78 | { | 366 | 78 | calc_positions(); | 367 | 78 | utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_); | 368 | 0 | utf8_reader_return_after_error(false); | 369 | 0 | } | 370 | | | 371 | 304k | currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i]; | 372 | | | 373 | 304k | if (decoder_.has_code_point()) | 374 | 271k | { | 375 | 271k | auto& cp = codepoints_.buffer[codepoints_.count++]; | 376 | | | 377 | 271k | cp.value = decoder_.codepoint; | 378 | 271k | cp.count = currently_decoding_.count; | 379 | 271k | std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count); | 380 | 271k | currently_decoding_.count = {}; | 381 | 271k | } | 382 | 32.6k | else if TOML_UNLIKELY(currently_decoding_.count == 4u) | 383 | 0 | { | 384 | 0 | calc_positions(); | 385 | 0 | utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_); | 386 | 0 | utf8_reader_return_after_error(false); | 387 | 0 | } | 388 | 304k | } | 389 | 10.6k | if TOML_UNLIKELY(decoder_.needs_more_input() && stream_.eof()) | 390 | 31 | { | 391 | 31 | calc_positions(); | 392 | 31 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", | 393 | 31 | error_pos(), | 394 | 31 | source_path_); | 395 | 0 | utf8_reader_return_after_error(false); | 396 | 0 | } | 397 | 10.6k | } | 398 | | | 399 | 1.44M | TOML_ASSERT_ASSUME(codepoints_.count); | 400 | 1.44M | calc_positions(); | 401 | | | 402 | | // handle general I/O errors | 403 | | // (down here so the next_pos_ benefits from calc_positions()) | 404 | 1.44M | if TOML_UNLIKELY(stream_.error()) | 405 | 0 | { | 406 | 0 | utf8_reader_error("An I/O error occurred while reading from the underlying stream", | 407 | 0 | next_pos_, | 408 | 0 | source_path_); | 409 | 0 | utf8_reader_return_after_error(false); | 410 | 0 | } | 411 | | | 412 | 1.44M | return true; | 413 | 1.44M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::read_next_block() |
414 | | |
415 | | public: |
416 | | template <typename U, typename String = std::string_view> |
417 | | TOML_NODISCARD_CTOR |
418 | | explicit utf8_reader(U&& source, String&& source_path = {}) noexcept( |
419 | | std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>) |
420 | 7.13k | : stream_{ static_cast<U&&>(source) } |
421 | 7.13k | { |
422 | 7.13k | currently_decoding_.count = {}; |
423 | | |
424 | 7.13k | codepoints_.current = {}; |
425 | 7.13k | codepoints_.count = {}; |
426 | | |
427 | 7.13k | if (!source_path.empty()) |
428 | 0 | source_path_ = std::make_shared<const std::string>(static_cast<String&&>(source_path)); |
429 | 7.13k | } |
430 | | |
431 | | TOML_PURE_INLINE_GETTER |
432 | | const source_path_ptr& source_path() const noexcept final |
433 | 1.21M | { |
434 | 1.21M | return source_path_; |
435 | 1.21M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::source_path() const Line | Count | Source | 433 | 1.21M | { | 434 | 1.21M | return source_path_; | 435 | 1.21M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::source_path() const |
436 | | |
437 | | TOML_NODISCARD |
438 | | const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final |
439 | 46.0M | { |
440 | 46.0M | utf8_reader_error_check({}); |
441 | | |
442 | 46.0M | if (codepoints_.current == codepoints_.count) |
443 | 1.44M | { |
444 | 1.44M | if TOML_UNLIKELY(!stream_ || !read_next_block()) |
445 | 5.57k | return nullptr; |
446 | | |
447 | 1.44M | TOML_ASSERT_ASSUME(!codepoints_.current); |
448 | 1.44M | } |
449 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.count); |
450 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity); |
451 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count); |
452 | | |
453 | 46.0M | return &codepoints_.buffer[codepoints_.current++]; |
454 | 46.0M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::read_next() Line | Count | Source | 439 | 46.0M | { | 440 | 46.0M | utf8_reader_error_check({}); | 441 | | | 442 | 46.0M | if (codepoints_.current == codepoints_.count) | 443 | 1.44M | { | 444 | 1.44M | if TOML_UNLIKELY(!stream_ || !read_next_block()) | 445 | 5.57k | return nullptr; | 446 | | | 447 | 1.44M | TOML_ASSERT_ASSUME(!codepoints_.current); | 448 | 1.44M | } | 449 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.count); | 450 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity); | 451 | 46.0M | TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count); | 452 | | | 453 | 46.0M | return &codepoints_.buffer[codepoints_.current++]; | 454 | 46.0M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::read_next() |
455 | | |
456 | | TOML_NODISCARD |
457 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final |
458 | 7.13k | { |
459 | 7.13k | return stream_.peek_eof(); |
460 | 7.13k | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::peek_eof() const Line | Count | Source | 458 | 7.13k | { | 459 | 7.13k | return stream_.peek_eof(); | 460 | 7.13k | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::peek_eof() const |
461 | | |
462 | | #if !TOML_EXCEPTIONS |
463 | | |
464 | | TOML_NODISCARD |
465 | | optional<parse_error>&& error() noexcept final |
466 | | { |
467 | | return std::move(err_); |
468 | | } |
469 | | |
470 | | #endif |
471 | | }; |
472 | | |
473 | | template <typename Char> |
474 | | utf8_reader(std::basic_string_view<Char>, std::string_view) -> utf8_reader<std::basic_string_view<Char>>; |
475 | | template <typename Char> |
476 | | utf8_reader(std::basic_string_view<Char>, std::string&&) -> utf8_reader<std::basic_string_view<Char>>; |
477 | | template <typename Char> |
478 | | utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>; |
479 | | template <typename Char> |
480 | | utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>; |
481 | | |
482 | | #if TOML_EXCEPTIONS |
483 | 48.0M | #define utf8_buffered_reader_error_check(...) static_assert(true) |
484 | | #else |
485 | | #define utf8_buffered_reader_error_check(...) \ |
486 | | do \ |
487 | | { \ |
488 | | if TOML_UNLIKELY(reader_.error()) \ |
489 | | return __VA_ARGS__; \ |
490 | | } \ |
491 | | while (false) |
492 | | |
493 | | #endif |
494 | | |
495 | | class TOML_EMPTY_BASES utf8_buffered_reader |
496 | | { |
497 | | public: |
498 | | static constexpr size_t max_history_length = 128; |
499 | | |
500 | | private: |
501 | | static constexpr size_t history_buffer_size = max_history_length - 1; //'head' is stored in the reader |
502 | | utf8_reader_interface& reader_; |
503 | | struct |
504 | | { |
505 | | utf8_codepoint buffer[history_buffer_size]; |
506 | | size_t count, first; |
507 | | } history_ = {}; |
508 | | const utf8_codepoint* head_ = {}; |
509 | | size_t negative_offset_ = {}; |
510 | | |
511 | | public: |
512 | | TOML_NODISCARD_CTOR |
513 | | explicit utf8_buffered_reader(utf8_reader_interface& reader) noexcept // |
514 | 7.13k | : reader_{ reader } |
515 | 7.13k | {} |
516 | | |
517 | | TOML_PURE_INLINE_GETTER |
518 | | const source_path_ptr& source_path() const noexcept |
519 | 1.21M | { |
520 | 1.21M | return reader_.source_path(); |
521 | 1.21M | } |
522 | | |
523 | | TOML_NODISCARD |
524 | | const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
525 | 47.2M | { |
526 | 47.2M | utf8_buffered_reader_error_check({}); |
527 | | |
528 | 47.2M | if (negative_offset_) |
529 | 1.17M | { |
530 | 1.17M | negative_offset_--; |
531 | | |
532 | | // an entry negative offset of 1 just means "replay the current head" |
533 | 1.17M | if (!negative_offset_) |
534 | 842k | return head_; |
535 | | |
536 | | // otherwise step back into the history buffer |
537 | 336k | else |
538 | 336k | return history_.buffer |
539 | 336k | + ((history_.first + history_.count - negative_offset_) % history_buffer_size); |
540 | 1.17M | } |
541 | 46.0M | else |
542 | 46.0M | { |
543 | | // first character read from stream |
544 | 46.0M | if TOML_UNLIKELY(!history_.count && !head_) |
545 | 7.08k | head_ = reader_.read_next(); |
546 | | |
547 | | // subsequent characters and not eof |
548 | 46.0M | else if (head_) |
549 | 46.0M | { |
550 | 46.0M | if TOML_UNLIKELY(history_.count < history_buffer_size) |
551 | 279k | history_.buffer[history_.count++] = *head_; |
552 | 45.7M | else |
553 | 45.7M | history_.buffer[(history_.first++ + history_buffer_size) % history_buffer_size] = *head_; |
554 | | |
555 | 46.0M | head_ = reader_.read_next(); |
556 | 46.0M | } |
557 | | |
558 | 46.0M | return head_; |
559 | 46.0M | } |
560 | 47.2M | } |
561 | | |
562 | | TOML_NODISCARD |
563 | | const utf8_codepoint* step_back(size_t count) noexcept |
564 | 845k | { |
565 | 845k | utf8_buffered_reader_error_check({}); |
566 | | |
567 | 845k | TOML_ASSERT_ASSUME(history_.count); |
568 | 845k | TOML_ASSERT_ASSUME(negative_offset_ + count <= history_.count); |
569 | | |
570 | 845k | negative_offset_ += count; |
571 | | |
572 | 845k | return negative_offset_ |
573 | 845k | ? history_.buffer + ((history_.first + history_.count - negative_offset_) % history_buffer_size) |
574 | 845k | : head_; |
575 | 845k | } |
576 | | |
577 | | TOML_NODISCARD |
578 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
579 | 7.13k | { |
580 | 7.13k | return reader_.peek_eof(); |
581 | 7.13k | } |
582 | | |
583 | | #if !TOML_EXCEPTIONS |
584 | | |
585 | | TOML_NODISCARD |
586 | | optional<parse_error>&& error() noexcept |
587 | | { |
588 | | return reader_.error(); |
589 | | } |
590 | | |
591 | | #endif |
592 | | }; |
593 | | } |
594 | | TOML_ANON_NAMESPACE_END; |
595 | | |
596 | | //#--------------------------------------------------------------------------------------------------------------------- |
597 | | //# PARSER INTERNAL IMPLEMENTATION |
598 | | //#--------------------------------------------------------------------------------------------------------------------- |
599 | | |
600 | | #if TOML_EXCEPTIONS |
601 | | #define TOML_RETURNS_BY_THROWING [[noreturn]] |
602 | | #else |
603 | | #define TOML_RETURNS_BY_THROWING |
604 | | #endif |
605 | | |
606 | | TOML_ANON_NAMESPACE_START |
607 | | { |
608 | | template <typename... T> |
609 | | TOML_CONST_GETTER |
610 | | TOML_INTERNAL_LINKAGE |
611 | | constexpr bool is_match(char32_t codepoint, T... vals) noexcept |
612 | 4.34M | { |
613 | 4.34M | static_assert((std::is_same_v<char32_t, T> && ...)); |
614 | 8.73M | return ((codepoint == vals) || ...); |
615 | 4.34M | } bool toml::v3::impl::is_match<char32_t, char32_t>(char32_t, char32_t, char32_t) Line | Count | Source | 612 | 4.31M | { | 613 | 4.31M | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 8.62M | return ((codepoint == vals) || ...); | 615 | 4.31M | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 9.54k | { | 613 | 9.54k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 26.0k | return ((codepoint == vals) || ...); | 615 | 9.54k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 4.41k | { | 613 | 4.41k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 35.1k | return ((codepoint == vals) || ...); | 615 | 4.41k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 9.90k | { | 613 | 9.90k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 44.0k | return ((codepoint == vals) || ...); | 615 | 9.90k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 1.65k | { | 613 | 1.65k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 6.04k | return ((codepoint == vals) || ...); | 615 | 1.65k | } |
|
616 | | |
617 | | template <uint64_t> |
618 | | struct parse_integer_traits; |
619 | | template <> |
620 | | struct parse_integer_traits<2> |
621 | | { |
622 | | static constexpr auto scope_qualifier = "binary integer"sv; |
623 | | static constexpr auto is_digit = impl::is_binary_digit; |
624 | | static constexpr auto is_signed = false; |
625 | | static constexpr auto max_digits = 63; |
626 | | static constexpr auto prefix_codepoint = U'b'; |
627 | | static constexpr auto prefix = "b"sv; |
628 | | static constexpr auto full_prefix = "0b"sv; |
629 | | }; |
630 | | template <> |
631 | | struct parse_integer_traits<8> |
632 | | { |
633 | | static constexpr auto scope_qualifier = "octal integer"sv; |
634 | | static constexpr auto is_digit = impl::is_octal_digit; |
635 | | static constexpr auto is_signed = false; |
636 | | static constexpr auto max_digits = 21; // strlen("777777777777777777777") |
637 | | static constexpr auto prefix_codepoint = U'o'; |
638 | | static constexpr auto prefix = "o"sv; |
639 | | static constexpr auto full_prefix = "0o"sv; |
640 | | }; |
641 | | template <> |
642 | | struct parse_integer_traits<10> |
643 | | { |
644 | | static constexpr auto scope_qualifier = "decimal integer"sv; |
645 | | static constexpr auto is_digit = impl::is_decimal_digit; |
646 | | static constexpr auto is_signed = true; |
647 | | static constexpr auto max_digits = 19; // strlen("9223372036854775807") |
648 | | static constexpr auto full_prefix = ""sv; |
649 | | }; |
650 | | template <> |
651 | | struct parse_integer_traits<16> |
652 | | { |
653 | | static constexpr auto scope_qualifier = "hexadecimal integer"sv; |
654 | | static constexpr auto is_digit = impl::is_hexadecimal_digit; |
655 | | static constexpr auto is_signed = false; |
656 | | static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF") |
657 | | static constexpr auto prefix_codepoint = U'x'; |
658 | | static constexpr auto prefix = "x"sv; |
659 | | static constexpr auto full_prefix = "0x"sv; |
660 | | }; |
661 | | |
662 | | TOML_PURE_GETTER |
663 | | TOML_INTERNAL_LINKAGE |
664 | | std::string_view to_sv(node_type val) noexcept |
665 | 52 | { |
666 | 52 | return impl::node_type_friendly_names[impl::unwrap_enum(val)]; |
667 | 52 | } |
668 | | |
669 | | TOML_PURE_GETTER |
670 | | TOML_INTERNAL_LINKAGE |
671 | | std::string_view to_sv(const std::string& str) noexcept |
672 | 113 | { |
673 | 113 | return std::string_view{ str }; |
674 | 113 | } |
675 | | |
676 | | TOML_CONST_GETTER |
677 | | TOML_INTERNAL_LINKAGE |
678 | | std::string_view to_sv(bool val) noexcept |
679 | 23 | { |
680 | 23 | using namespace std::string_view_literals; |
681 | | |
682 | 23 | return val ? "true"sv : "false"sv; |
683 | 23 | } |
684 | | |
685 | | TOML_PURE_GETTER |
686 | | TOML_INTERNAL_LINKAGE |
687 | | std::string_view to_sv(const utf8_codepoint& cp) noexcept |
688 | 1.63k | { |
689 | 1.63k | if (cp.value <= U'\x1F') |
690 | 203 | return impl::control_char_escapes[cp.value]; |
691 | 1.42k | else if (cp.value == U'\x7F') |
692 | 43 | return "\\u007F"sv; |
693 | 1.38k | else |
694 | 1.38k | return std::string_view{ cp.bytes, cp.count }; |
695 | 1.63k | } |
696 | | |
697 | | TOML_PURE_GETTER |
698 | | TOML_INTERNAL_LINKAGE |
699 | | std::string_view to_sv(const utf8_codepoint* cp) noexcept |
700 | 503 | { |
701 | 503 | if (cp) |
702 | 503 | return to_sv(*cp); |
703 | 0 | return ""sv; |
704 | 503 | } |
705 | | |
706 | | struct escaped_codepoint |
707 | | { |
708 | | const utf8_codepoint& cp; |
709 | | }; |
710 | | |
711 | | template <typename T> |
712 | | TOML_ATTR(nonnull) |
713 | | TOML_INTERNAL_LINKAGE |
714 | | void concatenate(char*& write_pos, char* const buf_end, const T& arg) noexcept |
715 | 18.7k | { |
716 | 18.7k | if TOML_UNLIKELY(write_pos >= buf_end) |
717 | 4 | return; |
718 | | |
719 | 18.7k | using arg_type = impl::remove_cvref<T>; |
720 | | |
721 | | // string views |
722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) |
723 | 18.4k | { |
724 | 18.4k | const auto max_chars = static_cast<size_t>(buf_end - write_pos); |
725 | 18.4k | const auto len = max_chars < arg.length() ? max_chars : arg.length(); |
726 | 18.4k | std::memcpy(write_pos, arg.data(), len); |
727 | 18.4k | write_pos += len; |
728 | | } |
729 | | |
730 | | // doubles |
731 | | else if constexpr (std::is_same_v<arg_type, double>) |
732 | | { |
733 | | #if TOML_FLOAT_CHARCONV |
734 | | const auto result = std::to_chars(write_pos, buf_end, arg); |
735 | | write_pos = result.ptr; |
736 | | #else |
737 | | std::ostringstream ss; |
738 | | ss.imbue(std::locale::classic()); |
739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); |
740 | | ss << arg; |
741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); |
742 | | #endif |
743 | | } |
744 | | |
745 | | // 64-bit integers |
746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) |
747 | 93 | { |
748 | 93 | #if TOML_INT_CHARCONV |
749 | 93 | const auto result = std::to_chars(write_pos, buf_end, arg); |
750 | 93 | write_pos = result.ptr; |
751 | | #else |
752 | | std::ostringstream ss; |
753 | | ss.imbue(std::locale::classic()); |
754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; |
755 | | ss << static_cast<cast_type>(arg); |
756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); |
757 | | #endif |
758 | | } |
759 | | |
760 | | // escaped_codepoint |
761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) |
762 | 96 | { |
763 | 96 | if (arg.cp.value <= U'\x7F') |
764 | 21 | concatenate(write_pos, buf_end, to_sv(arg.cp)); |
765 | 75 | else |
766 | 75 | { |
767 | 75 | auto val = static_cast<uint_least32_t>(arg.cp.value); |
768 | 75 | const auto digits = val > 0xFFFFu ? 8u : 4u; |
769 | 75 | constexpr auto mask = uint_least32_t{ 0xFu }; |
770 | 75 | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; |
771 | 411 | for (auto i = 2u + digits; i-- > 2u;) |
772 | 336 | { |
773 | 336 | const auto hexdig = val & mask; |
774 | 336 | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); |
775 | 336 | val >>= 4; |
776 | 336 | } |
777 | 75 | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); |
778 | 75 | } |
779 | | } |
780 | | |
781 | | // all other floats (fallback - coerce to double) |
782 | | else if constexpr (std::is_floating_point_v<arg_type>) |
783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); |
784 | | |
785 | | // all other integers (fallback - coerce to (u)int64_t) |
786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) |
787 | 50 | { |
788 | 50 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; |
789 | 50 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); |
790 | | } |
791 | | |
792 | | else |
793 | | { |
794 | | static_assert( |
795 | | impl::always_false<T>, |
796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); |
797 | | } |
798 | 18.7k | } void toml::v3::impl::concatenate<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(char*&, char*, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Line | Count | Source | 715 | 18.4k | { | 716 | 18.4k | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 4 | return; | 718 | | | 719 | 18.4k | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | 18.4k | { | 724 | 18.4k | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | 18.4k | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | 18.4k | std::memcpy(write_pos, arg.data(), len); | 727 | 18.4k | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 18.4k | } |
void toml::v3::impl::concatenate<toml::v3::impl::escaped_codepoint>(char*&, char*, toml::v3::impl::escaped_codepoint const&) Line | Count | Source | 715 | 96 | { | 716 | 96 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 96 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | 96 | { | 763 | 96 | if (arg.cp.value <= U'\x7F') | 764 | 21 | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | 75 | else | 766 | 75 | { | 767 | 75 | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | 75 | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | 75 | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | 75 | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | 411 | for (auto i = 2u + digits; i-- > 2u;) | 772 | 336 | { | 773 | 336 | const auto hexdig = val & mask; | 774 | 336 | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | 336 | val >>= 4; | 776 | 336 | } | 777 | 75 | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | 75 | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 96 | } |
void toml::v3::impl::concatenate<unsigned long>(char*&, char*, unsigned long const&) Line | Count | Source | 715 | 83 | { | 716 | 83 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 83 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | 83 | { | 748 | 83 | #if TOML_INT_CHARCONV | 749 | 83 | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | 83 | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 83 | } |
void toml::v3::impl::concatenate<unsigned int>(char*&, char*, unsigned int const&) Line | Count | Source | 715 | 40 | { | 716 | 40 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 40 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | 40 | { | 788 | 40 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | 40 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 40 | } |
void toml::v3::impl::concatenate<int>(char*&, char*, int const&) Line | Count | Source | 715 | 10 | { | 716 | 10 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 10 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | 10 | { | 788 | 10 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | 10 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 10 | } |
void toml::v3::impl::concatenate<long>(char*&, char*, long const&) Line | Count | Source | 715 | 10 | { | 716 | 10 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 10 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | 10 | { | 748 | 10 | #if TOML_INT_CHARCONV | 749 | 10 | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | 10 | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 10 | } |
|
799 | | |
800 | | struct error_builder |
801 | | { |
802 | | static constexpr std::size_t buf_size = 512; |
803 | | char buf[buf_size]; |
804 | | char* write_pos = buf; |
805 | | char* const max_write_pos = buf + (buf_size - std::size_t{ 1 }); // allow for null terminator |
806 | | |
807 | | TOML_NODISCARD_CTOR |
808 | | error_builder(std::string_view scope) noexcept |
809 | 3.56k | { |
810 | 3.56k | concatenate(write_pos, max_write_pos, "Error while parsing "sv); |
811 | 3.56k | concatenate(write_pos, max_write_pos, scope); |
812 | 3.56k | concatenate(write_pos, max_write_pos, ": "sv); |
813 | 3.56k | } |
814 | | |
815 | | template <typename T> |
816 | | void append(const T& arg) noexcept |
817 | 7.86k | { |
818 | 7.86k | concatenate(write_pos, max_write_pos, arg); |
819 | 7.86k | } void toml::v3::impl::error_builder::append<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Line | Count | Source | 817 | 7.67k | { | 818 | 7.67k | concatenate(write_pos, max_write_pos, arg); | 819 | 7.67k | } |
void toml::v3::impl::error_builder::append<toml::v3::impl::escaped_codepoint>(toml::v3::impl::escaped_codepoint const&) Line | Count | Source | 817 | 96 | { | 818 | 96 | concatenate(write_pos, max_write_pos, arg); | 819 | 96 | } |
void toml::v3::impl::error_builder::append<unsigned long>(unsigned long const&) Line | Count | Source | 817 | 43 | { | 818 | 43 | concatenate(write_pos, max_write_pos, arg); | 819 | 43 | } |
void toml::v3::impl::error_builder::append<unsigned int>(unsigned int const&) Line | Count | Source | 817 | 40 | { | 818 | 40 | concatenate(write_pos, max_write_pos, arg); | 819 | 40 | } |
void toml::v3::impl::error_builder::append<int>(int const&) Line | Count | Source | 817 | 10 | { | 818 | 10 | concatenate(write_pos, max_write_pos, arg); | 819 | 10 | } |
|
820 | | |
821 | | TOML_RETURNS_BY_THROWING |
822 | | auto finish(const source_position& pos, const source_path_ptr& source_path) const |
823 | 3.56k | { |
824 | 3.56k | *write_pos = '\0'; |
825 | | |
826 | 3.56k | #if TOML_EXCEPTIONS |
827 | 3.56k | throw parse_error{ buf, pos, source_path }; |
828 | | #else |
829 | | return parse_error{ std::string(buf, static_cast<size_t>(write_pos - buf)), pos, source_path }; |
830 | | #endif |
831 | 3.56k | } |
832 | | |
833 | | TOML_DELETE_DEFAULTS(error_builder); |
834 | | }; |
835 | | |
836 | | struct parse_scope |
837 | | { |
838 | | std::string_view& storage_; |
839 | | std::string_view parent_; |
840 | | |
841 | | TOML_NODISCARD_CTOR |
842 | | explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept |
843 | 1.40M | : storage_{ current_scope }, |
844 | 1.40M | parent_{ current_scope } |
845 | 1.40M | { |
846 | 1.40M | storage_ = new_scope; |
847 | 1.40M | } |
848 | | |
849 | | ~parse_scope() noexcept |
850 | 1.40M | { |
851 | 1.40M | storage_ = parent_; |
852 | 1.40M | } |
853 | | |
854 | | TOML_DELETE_DEFAULTS(parse_scope); |
855 | | }; |
856 | 1.40M | #define push_parse_scope_2(scope, line) parse_scope ps_##line(current_scope, scope) |
857 | 1.40M | #define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line) |
858 | 1.40M | #define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__) |
859 | | |
860 | | struct parse_key_buffer |
861 | | { |
862 | | std::string buffer; |
863 | | std::vector<std::pair<size_t, size_t>> segments; |
864 | | std::vector<source_position> starts; |
865 | | std::vector<source_position> ends; |
866 | | |
867 | | void clear() noexcept |
868 | 208k | { |
869 | 208k | buffer.clear(); |
870 | 208k | segments.clear(); |
871 | 208k | starts.clear(); |
872 | 208k | ends.clear(); |
873 | 208k | } |
874 | | |
875 | | void push_back(std::string_view segment, source_position b, source_position e) |
876 | 1.15M | { |
877 | 1.15M | segments.push_back({ buffer.length(), segment.length() }); |
878 | 1.15M | buffer.append(segment); |
879 | 1.15M | starts.push_back(b); |
880 | 1.15M | ends.push_back(e); |
881 | 1.15M | } |
882 | | |
883 | | TOML_PURE_INLINE_GETTER |
884 | | std::string_view operator[](size_t i) const noexcept |
885 | 2.00M | { |
886 | 2.00M | return std::string_view{ buffer.c_str() + segments[i].first, segments[i].second }; |
887 | 2.00M | } |
888 | | |
889 | | TOML_PURE_INLINE_GETTER |
890 | | std::string_view back() const noexcept |
891 | 207k | { |
892 | 207k | return (*this)[segments.size() - 1u]; |
893 | 207k | } |
894 | | |
895 | | TOML_PURE_INLINE_GETTER |
896 | | bool empty() const noexcept |
897 | 14 | { |
898 | 14 | return segments.empty(); |
899 | 14 | } |
900 | | |
901 | | TOML_PURE_INLINE_GETTER |
902 | | size_t size() const noexcept |
903 | 2.17M | { |
904 | 2.17M | return segments.size(); |
905 | 2.17M | } |
906 | | }; |
907 | | |
908 | | struct depth_counter_scope |
909 | | { |
910 | | size_t& depth_; |
911 | | |
912 | | TOML_NODISCARD_CTOR |
913 | | explicit depth_counter_scope(size_t& depth) noexcept // |
914 | 862k | : depth_{ depth } |
915 | 862k | { |
916 | 862k | depth_++; |
917 | 862k | } |
918 | | |
919 | | ~depth_counter_scope() noexcept |
920 | 862k | { |
921 | 862k | depth_--; |
922 | 862k | } |
923 | | |
924 | | TOML_DELETE_DEFAULTS(depth_counter_scope); |
925 | | }; |
926 | | |
927 | | struct parsed_string |
928 | | { |
929 | | std::string_view value; |
930 | | bool was_multi_line; |
931 | | }; |
932 | | |
933 | | struct table_vector_scope |
934 | | { |
935 | | std::vector<table*>& tables; |
936 | | |
937 | | TOML_NODISCARD_CTOR |
938 | | explicit table_vector_scope(std::vector<table*>& tables_, table& tbl) // |
939 | 6.99k | : tables{ tables_ } |
940 | 6.99k | { |
941 | 6.99k | tables.push_back(&tbl); |
942 | 6.99k | } |
943 | | |
944 | | ~table_vector_scope() noexcept |
945 | 6.99k | { |
946 | 6.99k | tables.pop_back(); |
947 | 6.99k | } |
948 | | |
949 | | TOML_DELETE_DEFAULTS(table_vector_scope); |
950 | | }; |
951 | | } |
952 | | TOML_ANON_NAMESPACE_END; |
953 | | |
954 | | #if 1 // parser helper macros |
955 | | |
956 | | // Q: "what the fuck is this? MACROS????" |
957 | | // A: The parser needs to work in exceptionless mode (returning error objects directly) |
958 | | // and exception mode (reporting parse failures by throwing). Two totally different control flows. |
959 | | // These macros encapsulate the differences between the two modes so I can write code code |
960 | | // as though I was only targeting one mode and not want yeet myself into the sun. |
961 | | // They're all #undef'd at the bottom of the parser's implementation so they should be harmless outside |
962 | | // of toml++. |
963 | | |
964 | 97.4M | #define is_eof() !cp |
965 | 96.9M | #define assert_not_eof() TOML_ASSERT_ASSUME(cp != nullptr) |
966 | | #define return_if_eof(...) \ |
967 | 11.0M | do \ |
968 | 11.0M | { \ |
969 | 11.0M | if TOML_UNLIKELY(is_eof()) \ |
970 | 11.0M | return __VA_ARGS__; \ |
971 | 11.0M | } \ |
972 | 11.0M | while (false) |
973 | | |
974 | | #if TOML_EXCEPTIONS |
975 | 2.83M | #define is_error() false |
976 | 3.41k | #define return_after_error(...) TOML_UNREACHABLE |
977 | 7.01k | #define assert_not_error() static_assert(true) |
978 | 127M | #define return_if_error(...) static_assert(true) |
979 | 11.0M | #define return_if_error_or_eof(...) return_if_eof(__VA_ARGS__) |
980 | | #else |
981 | | #define is_error() !!err |
982 | | #define return_after_error(...) return __VA_ARGS__ |
983 | | #define assert_not_error() TOML_ASSERT(!is_error()) |
984 | | #define return_if_error(...) \ |
985 | | do \ |
986 | | { \ |
987 | | if TOML_UNLIKELY(is_error()) \ |
988 | | return __VA_ARGS__; \ |
989 | | } \ |
990 | | while (false) |
991 | | #define return_if_error_or_eof(...) \ |
992 | | do \ |
993 | | { \ |
994 | | if TOML_UNLIKELY(is_eof() || is_error()) \ |
995 | | return __VA_ARGS__; \ |
996 | | } \ |
997 | | while (false) |
998 | | #endif |
999 | | |
1000 | | #if defined(TOML_BREAK_AT_PARSE_ERRORS) && TOML_BREAK_AT_PARSE_ERRORS |
1001 | | #if defined(__has_builtin) |
1002 | | #if __has_builtin(__builtin_debugtrap) |
1003 | | #define parse_error_break() __builtin_debugtrap() |
1004 | | #elif __has_builtin(__debugbreak) |
1005 | | #define parse_error_break() __debugbreak() |
1006 | | #endif |
1007 | | #endif |
1008 | | #ifndef parse_error_break |
1009 | | #if TOML_MSVC || TOML_ICC |
1010 | | #define parse_error_break() __debugbreak() |
1011 | | #else |
1012 | | #define parse_error_break() TOML_ASSERT(false) |
1013 | | #endif |
1014 | | #endif |
1015 | | #else |
1016 | 3.56k | #define parse_error_break() static_assert(true) |
1017 | | #endif |
1018 | | |
1019 | | #define set_error_and_return(ret, ...) \ |
1020 | 3.07k | do \ |
1021 | 3.07k | { \ |
1022 | 3.07k | if (!is_error()) \ |
1023 | 3.07k | set_error(__VA_ARGS__); \ |
1024 | 3.07k | return_after_error(ret); \ |
1025 | 3.07k | } \ |
1026 | 3.07k | while (false) |
1027 | | |
1028 | 2.47k | #define set_error_and_return_default(...) set_error_and_return({}, __VA_ARGS__) |
1029 | | |
1030 | | #define set_error_and_return_if_eof(...) \ |
1031 | 5.76M | do \ |
1032 | 5.76M | { \ |
1033 | 5.76M | if TOML_UNLIKELY(is_eof()) \ |
1034 | 5.76M | set_error_and_return(__VA_ARGS__, "encountered end-of-file"sv); \ |
1035 | 5.76M | } \ |
1036 | 5.76M | while (false) |
1037 | | |
1038 | | #define advance_and_return_if_error(...) \ |
1039 | 43.9M | do \ |
1040 | 43.9M | { \ |
1041 | 43.9M | assert_not_eof(); \ |
1042 | 43.9M | advance(); \ |
1043 | 43.9M | return_if_error(__VA_ARGS__); \ |
1044 | 43.9M | } \ |
1045 | 43.9M | while (false) |
1046 | | |
1047 | | #define advance_and_return_if_error_or_eof(...) \ |
1048 | 2.40M | do \ |
1049 | 2.40M | { \ |
1050 | 2.40M | assert_not_eof(); \ |
1051 | 2.40M | advance(); \ |
1052 | 2.40M | return_if_error(__VA_ARGS__); \ |
1053 | 2.40M | set_error_and_return_if_eof(__VA_ARGS__); \ |
1054 | 2.40M | } \ |
1055 | 2.40M | while (false) |
1056 | | |
1057 | | #endif // parser helper macros |
1058 | | |
1059 | | TOML_IMPL_NAMESPACE_START |
1060 | | { |
1061 | | TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex); |
1062 | | |
1063 | | class parser |
1064 | | { |
1065 | | private: |
1066 | | static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES; |
1067 | | static constexpr size_t max_dotted_keys_depth = TOML_MAX_DOTTED_KEYS_DEPTH; |
1068 | | |
1069 | | utf8_buffered_reader reader; |
1070 | | table root; |
1071 | | source_position prev_pos = { 1, 1 }; |
1072 | | const utf8_codepoint* cp = {}; |
1073 | | std::vector<table*> implicit_tables; |
1074 | | std::vector<table*> dotted_key_tables; |
1075 | | std::vector<table*> open_inline_tables; |
1076 | | std::vector<array*> table_arrays; |
1077 | | parse_key_buffer key_buffer; |
1078 | | std::string string_buffer; |
1079 | | std::string recording_buffer; // for diagnostics |
1080 | | bool recording = false, recording_whitespace = true; |
1081 | | std::string_view current_scope; |
1082 | | size_t nested_values = {}; |
1083 | | #if !TOML_EXCEPTIONS |
1084 | | mutable optional<parse_error> err; |
1085 | | #endif |
1086 | | |
1087 | | TOML_NODISCARD |
1088 | | source_position current_position(source_index fallback_offset = 0) const noexcept |
1089 | 3.34M | { |
1090 | 3.34M | if (!is_eof()) |
1091 | 3.34M | return cp->position; |
1092 | 7.91k | return { prev_pos.line, static_cast<source_index>(prev_pos.column + fallback_offset) }; |
1093 | 3.34M | } |
1094 | | |
1095 | | template <typename... T> |
1096 | | TOML_RETURNS_BY_THROWING |
1097 | | TOML_NEVER_INLINE |
1098 | | void set_error_at(source_position pos, const T&... reason) const |
1099 | 3.56k | { |
1100 | 3.56k | static_assert(sizeof...(T) > 0); |
1101 | 3.56k | return_if_error(); |
1102 | | |
1103 | 3.56k | error_builder builder{ current_scope }; |
1104 | 3.56k | (builder.append(reason), ...); |
1105 | | |
1106 | 3.56k | parse_error_break(); |
1107 | | |
1108 | 3.56k | #if TOML_EXCEPTIONS |
1109 | 3.56k | builder.finish(pos, reader.source_path()); |
1110 | | #else |
1111 | | err.emplace(builder.finish(pos, reader.source_path())); |
1112 | | #endif |
1113 | 3.56k | } void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, toml::v3::impl::escaped_codepoint, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, toml::v3::impl::escaped_codepoint const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 96 | { | 1100 | 96 | static_assert(sizeof...(T) > 0); | 1101 | 96 | return_if_error(); | 1102 | | | 1103 | 96 | error_builder builder{ current_scope }; | 1104 | 96 | (builder.append(reason), ...); | 1105 | | | 1106 | 96 | parse_error_break(); | 1107 | | | 1108 | 96 | #if TOML_EXCEPTIONS | 1109 | 96 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 96 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 1.55k | { | 1100 | 1.55k | static_assert(sizeof...(T) > 0); | 1101 | 1.55k | return_if_error(); | 1102 | | | 1103 | 1.55k | error_builder builder{ current_scope }; | 1104 | 1.55k | (builder.append(reason), ...); | 1105 | | | 1106 | 1.55k | parse_error_break(); | 1107 | | | 1108 | 1.55k | #if TOML_EXCEPTIONS | 1109 | 1.55k | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 1.55k | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 38 | { | 1100 | 38 | static_assert(sizeof...(T) > 0); | 1101 | 38 | return_if_error(); | 1102 | | | 1103 | 38 | error_builder builder{ current_scope }; | 1104 | 38 | (builder.append(reason), ...); | 1105 | | | 1106 | 38 | parse_error_break(); | 1107 | | | 1108 | 38 | #if TOML_EXCEPTIONS | 1109 | 38 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 38 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 1.65k | { | 1100 | 1.65k | static_assert(sizeof...(T) > 0); | 1101 | 1.65k | return_if_error(); | 1102 | | | 1103 | 1.65k | error_builder builder{ current_scope }; | 1104 | 1.65k | (builder.append(reason), ...); | 1105 | | | 1106 | 1.65k | parse_error_break(); | 1107 | | | 1108 | 1.65k | #if TOML_EXCEPTIONS | 1109 | 1.65k | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 1.65k | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 30 | { | 1100 | 30 | static_assert(sizeof...(T) > 0); | 1101 | 30 | return_if_error(); | 1102 | | | 1103 | 30 | error_builder builder{ current_scope }; | 1104 | 30 | (builder.append(reason), ...); | 1105 | | | 1106 | 30 | parse_error_break(); | 1107 | | | 1108 | 30 | #if TOML_EXCEPTIONS | 1109 | 30 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 30 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 66 | { | 1100 | 66 | static_assert(sizeof...(T) > 0); | 1101 | 66 | return_if_error(); | 1102 | | | 1103 | 66 | error_builder builder{ current_scope }; | 1104 | 66 | (builder.append(reason), ...); | 1105 | | | 1106 | 66 | parse_error_break(); | 1107 | | | 1108 | 66 | #if TOML_EXCEPTIONS | 1109 | 66 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 66 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 78 | { | 1100 | 78 | static_assert(sizeof...(T) > 0); | 1101 | 78 | return_if_error(); | 1102 | | | 1103 | 78 | error_builder builder{ current_scope }; | 1104 | 78 | (builder.append(reason), ...); | 1105 | | | 1106 | 78 | parse_error_break(); | 1107 | | | 1108 | 78 | #if TOML_EXCEPTIONS | 1109 | 78 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 78 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 2 | { | 1100 | 2 | static_assert(sizeof...(T) > 0); | 1101 | 2 | return_if_error(); | 1102 | | | 1103 | 2 | error_builder builder{ current_scope }; | 1104 | 2 | (builder.append(reason), ...); | 1105 | | | 1106 | 2 | parse_error_break(); | 1107 | | | 1108 | 2 | #if TOML_EXCEPTIONS | 1109 | 2 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1099 | 24 | { | 1100 | 24 | static_assert(sizeof...(T) > 0); | 1101 | 24 | return_if_error(); | 1102 | | | 1103 | 24 | error_builder builder{ current_scope }; | 1104 | 24 | (builder.append(reason), ...); | 1105 | | | 1106 | 24 | parse_error_break(); | 1107 | | | 1108 | 24 | #if TOML_EXCEPTIONS | 1109 | 24 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 24 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&) const Line | Count | Source | 1099 | 3 | { | 1100 | 3 | static_assert(sizeof...(T) > 0); | 1101 | 3 | return_if_error(); | 1102 | | | 1103 | 3 | error_builder builder{ current_scope }; | 1104 | 3 | (builder.append(reason), ...); | 1105 | | | 1106 | 3 | parse_error_break(); | 1107 | | | 1108 | 3 | #if TOML_EXCEPTIONS | 1109 | 3 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 3 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int, std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1099 | 8 | { | 1100 | 8 | static_assert(sizeof...(T) > 0); | 1101 | 8 | return_if_error(); | 1102 | | | 1103 | 8 | error_builder builder{ current_scope }; | 1104 | 8 | (builder.append(reason), ...); | 1105 | | | 1106 | 8 | parse_error_break(); | 1107 | | | 1108 | 8 | #if TOML_EXCEPTIONS | 1109 | 8 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 8 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, int const&) const Line | Count | Source | 1099 | 10 | { | 1100 | 10 | static_assert(sizeof...(T) > 0); | 1101 | 10 | return_if_error(); | 1102 | | | 1103 | 10 | error_builder builder{ current_scope }; | 1104 | 10 | (builder.append(reason), ...); | 1105 | | | 1106 | 10 | parse_error_break(); | 1107 | | | 1108 | 10 | #if TOML_EXCEPTIONS | 1109 | 10 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 10 | } |
|
1114 | | |
1115 | | template <typename... T> |
1116 | | TOML_RETURNS_BY_THROWING |
1117 | | void set_error(const T&... reason) const |
1118 | 3.23k | { |
1119 | 3.23k | set_error_at(current_position(1), reason...); |
1120 | 3.23k | } void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, toml::v3::impl::escaped_codepoint, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, toml::v3::impl::escaped_codepoint const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 96 | { | 1119 | 96 | set_error_at(current_position(1), reason...); | 1120 | 96 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 1.28k | { | 1119 | 1.28k | set_error_at(current_position(1), reason...); | 1120 | 1.28k | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 38 | { | 1119 | 38 | set_error_at(current_position(1), reason...); | 1120 | 38 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 16 | { | 1119 | 16 | set_error_at(current_position(1), reason...); | 1120 | 16 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 66 | { | 1119 | 66 | set_error_at(current_position(1), reason...); | 1120 | 66 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 78 | { | 1119 | 78 | set_error_at(current_position(1), reason...); | 1120 | 78 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 2 | { | 1119 | 2 | set_error_at(current_position(1), reason...); | 1120 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1118 | 24 | { | 1119 | 24 | set_error_at(current_position(1), reason...); | 1120 | 24 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&) const Line | Count | Source | 1118 | 3 | { | 1119 | 3 | set_error_at(current_position(1), reason...); | 1120 | 3 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int, std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1118 | 8 | { | 1119 | 8 | set_error_at(current_position(1), reason...); | 1120 | 8 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, int const&) const Line | Count | Source | 1118 | 10 | { | 1119 | 10 | set_error_at(current_position(1), reason...); | 1120 | 10 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 1.61k | { | 1119 | 1.61k | set_error_at(current_position(1), reason...); | 1120 | 1.61k | } |
|
1121 | | |
1122 | | void go_back(size_t count = 1) noexcept |
1123 | 845k | { |
1124 | 845k | return_if_error(); |
1125 | 845k | TOML_ASSERT_ASSUME(count); |
1126 | | |
1127 | 845k | cp = reader.step_back(count); |
1128 | 845k | prev_pos = cp->position; |
1129 | 845k | } |
1130 | | |
1131 | | void advance() |
1132 | 47.2M | { |
1133 | 47.2M | return_if_error(); |
1134 | 47.2M | assert_not_eof(); |
1135 | | |
1136 | 47.2M | prev_pos = cp->position; |
1137 | 47.2M | cp = reader.read_next(); |
1138 | | |
1139 | | #if !TOML_EXCEPTIONS |
1140 | | if (reader.error()) |
1141 | | { |
1142 | | err = std::move(reader.error()); |
1143 | | return; |
1144 | | } |
1145 | | #endif |
1146 | | |
1147 | 47.2M | if (recording && !is_eof()) |
1148 | 32.4M | { |
1149 | 32.4M | if (recording_whitespace || !is_whitespace(*cp)) |
1150 | 32.4M | recording_buffer.append(cp->bytes, cp->count); |
1151 | 32.4M | } |
1152 | 47.2M | } |
1153 | | |
1154 | | void start_recording(bool include_current = true) noexcept |
1155 | 211k | { |
1156 | 211k | return_if_error(); |
1157 | | |
1158 | 211k | recording = true; |
1159 | 211k | recording_whitespace = true; |
1160 | 211k | recording_buffer.clear(); |
1161 | 211k | if (include_current && !is_eof()) |
1162 | 211k | recording_buffer.append(cp->bytes, cp->count); |
1163 | 211k | } |
1164 | | |
1165 | | void stop_recording(size_t pop_bytes = 0) noexcept |
1166 | 210k | { |
1167 | 210k | return_if_error(); |
1168 | | |
1169 | 210k | recording = false; |
1170 | 210k | if (pop_bytes) |
1171 | 207k | { |
1172 | 207k | if (pop_bytes >= recording_buffer.length()) |
1173 | 64 | recording_buffer.clear(); |
1174 | 207k | else if (pop_bytes == 1u) |
1175 | 207k | recording_buffer.pop_back(); |
1176 | 0 | else |
1177 | 0 | recording_buffer.erase(recording_buffer.begin() |
1178 | 0 | + static_cast<ptrdiff_t>(recording_buffer.length() - pop_bytes), |
1179 | 0 | recording_buffer.end()); |
1180 | 207k | } |
1181 | 210k | } |
1182 | | |
1183 | | bool consume_leading_whitespace() |
1184 | 4.78M | { |
1185 | 4.78M | return_if_error_or_eof({}); |
1186 | | |
1187 | 4.77M | bool consumed = false; |
1188 | 4.83M | while (!is_eof() && is_horizontal_whitespace(*cp)) |
1189 | 59.7k | { |
1190 | 59.7k | if TOML_UNLIKELY(!is_ascii_horizontal_whitespace(*cp)) |
1191 | 59.7k | set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{ *cp }, "'"sv); |
1192 | | |
1193 | 59.7k | consumed = true; |
1194 | 59.7k | advance_and_return_if_error({}); |
1195 | 59.7k | } |
1196 | 4.77M | return consumed; |
1197 | 4.77M | } |
1198 | | |
1199 | | bool consume_line_break() |
1200 | 4.21M | { |
1201 | 4.21M | return_if_error_or_eof({}); |
1202 | | |
1203 | 4.21M | if TOML_UNLIKELY(is_match(*cp, U'\v', U'\f')) |
1204 | 4.21M | set_error_and_return_default( |
1205 | 4.21M | R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv); |
1206 | | |
1207 | 4.21M | if (*cp == U'\r') |
1208 | 676 | { |
1209 | 676 | advance_and_return_if_error({}); // skip \r |
1210 | | |
1211 | 676 | if TOML_UNLIKELY(is_eof()) |
1212 | 676 | set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv); |
1213 | | |
1214 | 672 | if TOML_UNLIKELY(*cp != U'\n') |
1215 | 672 | set_error_and_return_default("expected '\\n' after '\\r', saw '"sv, |
1216 | 672 | escaped_codepoint{ *cp }, |
1217 | 672 | "'"sv); |
1218 | 672 | } |
1219 | 4.21M | else if (*cp != U'\n') |
1220 | 2.07M | return false; |
1221 | | |
1222 | 2.14M | advance_and_return_if_error({}); // skip \n |
1223 | 2.14M | return true; |
1224 | 4.21M | } |
1225 | | |
1226 | | bool consume_rest_of_line() |
1227 | 0 | { |
1228 | 0 | return_if_error_or_eof({}); |
1229 | 0 |
|
1230 | 0 | do |
1231 | 0 | { |
1232 | 0 | if (is_ascii_vertical_whitespace(*cp)) |
1233 | 0 | return consume_line_break(); |
1234 | 0 | else |
1235 | 0 | advance(); |
1236 | 0 | return_if_error({}); |
1237 | 0 | } |
1238 | 0 | while (!is_eof()); |
1239 | 0 |
|
1240 | 0 | return true; |
1241 | 0 | } |
1242 | | |
1243 | | bool consume_comment() |
1244 | 2.04M | { |
1245 | 2.04M | return_if_error_or_eof({}); |
1246 | | |
1247 | 2.04M | if (*cp != U'#') |
1248 | 2.04M | return false; |
1249 | | |
1250 | 3.76k | push_parse_scope("comment"sv); |
1251 | | |
1252 | 3.76k | advance_and_return_if_error({}); // skip the '#' |
1253 | | |
1254 | 218k | while (!is_eof()) |
1255 | 218k | { |
1256 | 218k | if (consume_line_break()) |
1257 | 3.52k | return true; |
1258 | 214k | return_if_error({}); |
1259 | | |
1260 | 214k | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1261 | | |
1262 | | // toml/issues/567 (disallow non-TAB control characters in comments) |
1263 | 214k | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1264 | 214k | set_error_and_return_default( |
1265 | 214k | "control characters other than TAB (U+0009) are explicitly prohibited in comments"sv); |
1266 | | |
1267 | | // toml/pull/720 (disallow surrogates in comments) |
1268 | 214k | else if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1269 | 0 | set_error_and_return_default( |
1270 | 214k | "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited in comments"sv); |
1271 | 214k | #endif |
1272 | | |
1273 | 214k | advance_and_return_if_error({}); |
1274 | 214k | } |
1275 | | |
1276 | 235 | return true; |
1277 | 3.76k | } |
1278 | | |
1279 | | TOML_NODISCARD |
1280 | | bool consume_expected_sequence(std::u32string_view seq) |
1281 | 3.46k | { |
1282 | 3.46k | return_if_error({}); |
1283 | 3.46k | TOML_ASSERT(!seq.empty()); |
1284 | | |
1285 | 3.46k | for (auto c : seq) |
1286 | 12.9k | { |
1287 | 12.9k | set_error_and_return_if_eof({}); |
1288 | 12.9k | if (*cp != c) |
1289 | 50 | return false; |
1290 | 12.8k | advance_and_return_if_error({}); |
1291 | 12.8k | } |
1292 | 3.37k | return true; |
1293 | 3.46k | } |
1294 | | |
1295 | | template <typename T> |
1296 | | TOML_NODISCARD |
1297 | | bool consume_digit_sequence(T* digits, size_t len) |
1298 | 50.0k | { |
1299 | 50.0k | return_if_error({}); |
1300 | 50.0k | TOML_ASSERT_ASSUME(digits); |
1301 | 50.0k | TOML_ASSERT_ASSUME(len); |
1302 | | |
1303 | 165k | for (size_t i = 0; i < len; i++) |
1304 | 116k | { |
1305 | 116k | set_error_and_return_if_eof({}); |
1306 | 116k | if (!is_decimal_digit(*cp)) |
1307 | 294 | return false; |
1308 | | |
1309 | 115k | digits[i] = static_cast<T>(*cp - U'0'); |
1310 | 115k | advance_and_return_if_error({}); |
1311 | 115k | } |
1312 | 49.7k | return true; |
1313 | 50.0k | } bool toml::v3::impl::impl_ex::parser::consume_digit_sequence<unsigned int>(unsigned int*, unsigned long) Line | Count | Source | 1298 | 46.0k | { | 1299 | 46.0k | return_if_error({}); | 1300 | 46.0k | TOML_ASSERT_ASSUME(digits); | 1301 | 46.0k | TOML_ASSERT_ASSUME(len); | 1302 | | | 1303 | 153k | for (size_t i = 0; i < len; i++) | 1304 | 108k | { | 1305 | 108k | set_error_and_return_if_eof({}); | 1306 | 108k | if (!is_decimal_digit(*cp)) | 1307 | 258 | return false; | 1308 | | | 1309 | 107k | digits[i] = static_cast<T>(*cp - U'0'); | 1310 | 107k | advance_and_return_if_error({}); | 1311 | 107k | } | 1312 | 45.7k | return true; | 1313 | 46.0k | } |
bool toml::v3::impl::impl_ex::parser::consume_digit_sequence<int>(int*, unsigned long) Line | Count | Source | 1298 | 4.01k | { | 1299 | 4.01k | return_if_error({}); | 1300 | 4.01k | TOML_ASSERT_ASSUME(digits); | 1301 | 4.01k | TOML_ASSERT_ASSUME(len); | 1302 | | | 1303 | 11.9k | for (size_t i = 0; i < len; i++) | 1304 | 8.01k | { | 1305 | 8.01k | set_error_and_return_if_eof({}); | 1306 | 8.01k | if (!is_decimal_digit(*cp)) | 1307 | 36 | return false; | 1308 | | | 1309 | 7.98k | digits[i] = static_cast<T>(*cp - U'0'); | 1310 | 7.98k | advance_and_return_if_error({}); | 1311 | 7.98k | } | 1312 | 3.98k | return true; | 1313 | 4.01k | } |
|
1314 | | |
1315 | | template <typename T> |
1316 | | TOML_NODISCARD |
1317 | | size_t consume_variable_length_digit_sequence(T* buffer, size_t max_len) |
1318 | 3.60k | { |
1319 | 3.60k | return_if_error({}); |
1320 | 3.60k | TOML_ASSERT_ASSUME(buffer); |
1321 | 3.60k | TOML_ASSERT_ASSUME(max_len); |
1322 | | |
1323 | 3.60k | size_t i = {}; |
1324 | 43.9k | for (; i < max_len; i++) |
1325 | 43.4k | { |
1326 | 43.4k | if (is_eof() || !is_decimal_digit(*cp)) |
1327 | 3.14k | break; |
1328 | | |
1329 | 40.3k | buffer[i] = static_cast<T>(*cp - U'0'); |
1330 | 40.3k | advance_and_return_if_error({}); |
1331 | 40.3k | } |
1332 | 3.60k | return i; |
1333 | 3.60k | } |
1334 | | |
1335 | | TOML_NODISCARD |
1336 | | TOML_NEVER_INLINE |
1337 | | std::string_view parse_basic_string(bool multi_line) |
1338 | 4.96k | { |
1339 | 4.96k | return_if_error({}); |
1340 | 4.96k | assert_not_eof(); |
1341 | 4.96k | TOML_ASSERT_ASSUME(*cp == U'"'); |
1342 | 4.96k | push_parse_scope("string"sv); |
1343 | | |
1344 | | // skip the '"' |
1345 | 4.96k | advance_and_return_if_error_or_eof({}); |
1346 | | |
1347 | | // multi-line strings ignore a single line ending right at the beginning |
1348 | 4.96k | if (multi_line) |
1349 | 1.88k | { |
1350 | 1.88k | consume_line_break(); |
1351 | 1.88k | return_if_error({}); |
1352 | 1.88k | set_error_and_return_if_eof({}); |
1353 | 1.88k | } |
1354 | | |
1355 | 4.96k | auto& str = string_buffer; |
1356 | 4.96k | str.clear(); |
1357 | 4.96k | bool escaped = false; |
1358 | 4.96k | bool skipping_whitespace = false; |
1359 | 4.96k | do |
1360 | 7.92M | { |
1361 | 7.92M | if (escaped) |
1362 | 5.58k | { |
1363 | 5.58k | escaped = false; |
1364 | | |
1365 | | // handle 'line ending slashes' in multi-line mode |
1366 | 5.58k | if (multi_line && is_whitespace(*cp)) |
1367 | 2.00k | { |
1368 | 2.00k | consume_leading_whitespace(); |
1369 | | |
1370 | 2.00k | if TOML_UNLIKELY(!consume_line_break()) |
1371 | 2.00k | set_error_and_return_default( |
1372 | 1.98k | "line-ending backslashes must be the last non-whitespace character on the line"sv); |
1373 | | |
1374 | 1.98k | skipping_whitespace = true; |
1375 | 1.98k | return_if_error({}); |
1376 | 1.98k | continue; |
1377 | 2.00k | } |
1378 | | |
1379 | 3.58k | bool skip_escaped_codepoint = true; |
1380 | 3.58k | assert_not_eof(); |
1381 | 3.58k | switch (const auto escaped_codepoint = *cp) |
1382 | 3.58k | { |
1383 | | // 'regular' escape codes |
1384 | 211 | case U'b': str += '\b'; break; |
1385 | 289 | case U'f': str += '\f'; break; |
1386 | 328 | case U'n': str += '\n'; break; |
1387 | 196 | case U'r': str += '\r'; break; |
1388 | 369 | case U't': str += '\t'; break; |
1389 | 299 | case U'"': str += '"'; break; |
1390 | 400 | case U'\\': str += '\\'; break; |
1391 | | |
1392 | | #if TOML_LANG_UNRELEASED // toml/pull/790 (\e shorthand for \x1B) |
1393 | | case U'e': str += '\x1B'; break; |
1394 | | #else |
1395 | 1 | case U'e': |
1396 | 1 | set_error_and_return_default( |
1397 | 0 | "escape sequence '\\e' is not supported in TOML 1.0.0 and earlier"sv); |
1398 | 0 | #endif |
1399 | | |
1400 | | #if TOML_LANG_UNRELEASED // toml/pull/796 (\xHH unicode scalar sequences) |
1401 | | case U'x': [[fallthrough]]; |
1402 | | #else |
1403 | 2 | case U'x': |
1404 | 2 | set_error_and_return_default( |
1405 | 0 | "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv); |
1406 | 0 | #endif |
1407 | | |
1408 | | // unicode scalar sequences |
1409 | 1.09k | case U'u': [[fallthrough]]; |
1410 | 1.42k | case U'U': |
1411 | 1.42k | { |
1412 | 1.42k | push_parse_scope("unicode scalar sequence"sv); |
1413 | 1.42k | advance_and_return_if_error_or_eof({}); |
1414 | 1.41k | skip_escaped_codepoint = false; |
1415 | | |
1416 | 1.41k | uint32_t place_value = |
1417 | 1.41k | escaped_codepoint == U'U' ? 0x10000000u : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); |
1418 | 1.41k | uint32_t sequence_value{}; |
1419 | 8.06k | while (place_value) |
1420 | 6.70k | { |
1421 | 6.70k | set_error_and_return_if_eof({}); |
1422 | | |
1423 | 6.69k | if TOML_UNLIKELY(!is_hexadecimal_digit(*cp)) |
1424 | 6.69k | set_error_and_return_default("expected hex digit, saw '"sv, to_sv(*cp), "'"sv); |
1425 | | |
1426 | 6.65k | sequence_value += place_value * hex_to_dec(*cp); |
1427 | 6.65k | place_value /= 16u; |
1428 | 6.65k | advance_and_return_if_error({}); |
1429 | 6.65k | } |
1430 | | |
1431 | 1.35k | if TOML_UNLIKELY(is_unicode_surrogate(sequence_value)) |
1432 | 1.35k | set_error_and_return_default( |
1433 | 1.35k | "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv); |
1434 | 1.35k | else if TOML_UNLIKELY(sequence_value > 0x10FFFFu) |
1435 | 24 | set_error_and_return_default("values greater than U+10FFFF are invalid"sv); |
1436 | | |
1437 | 1.32k | if (sequence_value < 0x80) |
1438 | 232 | { |
1439 | 232 | str += static_cast<char>(sequence_value); |
1440 | 232 | } |
1441 | 1.09k | else if (sequence_value < 0x800u) |
1442 | 356 | { |
1443 | 356 | str += static_cast<char>((sequence_value >> 6) | 0xC0u); |
1444 | 356 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1445 | 356 | } |
1446 | 741 | else if (sequence_value < 0x10000u) |
1447 | 491 | { |
1448 | 491 | str += static_cast<char>((sequence_value >> 12) | 0xE0u); |
1449 | 491 | str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u); |
1450 | 491 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1451 | 491 | } |
1452 | 250 | else if (sequence_value < 0x110000u) |
1453 | 248 | { |
1454 | 248 | str += static_cast<char>((sequence_value >> 18) | 0xF0u); |
1455 | 248 | str += static_cast<char>(((sequence_value >> 12) & 0x3Fu) | 0x80u); |
1456 | 248 | str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u); |
1457 | 248 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1458 | 248 | } |
1459 | 1.32k | break; |
1460 | 1.35k | } |
1461 | | |
1462 | | // ??? |
1463 | 0 | TOML_UNLIKELY_CASE |
1464 | 73 | default: set_error_and_return_default("unknown escape sequence '\\"sv, to_sv(*cp), "'"sv); |
1465 | 3.58k | } |
1466 | | |
1467 | 3.41k | if (skip_escaped_codepoint) |
1468 | 2.09k | advance_and_return_if_error_or_eof({}); |
1469 | 3.41k | } |
1470 | 7.92M | else |
1471 | 7.92M | { |
1472 | | // handle closing delimiters |
1473 | 7.92M | if (*cp == U'"') |
1474 | 5.99k | { |
1475 | 5.99k | if (multi_line) |
1476 | 3.15k | { |
1477 | 3.15k | size_t lookaheads = {}; |
1478 | 3.15k | size_t consecutive_delimiters = 1; |
1479 | 3.15k | do |
1480 | 7.57k | { |
1481 | 7.57k | advance_and_return_if_error({}); |
1482 | 7.57k | lookaheads++; |
1483 | 7.57k | if (!is_eof() && *cp == U'"') |
1484 | 4.81k | consecutive_delimiters++; |
1485 | 2.75k | else |
1486 | 2.75k | break; |
1487 | 7.57k | } |
1488 | 4.81k | while (lookaheads < 4u); |
1489 | | |
1490 | 3.15k | switch (consecutive_delimiters) |
1491 | 3.15k | { |
1492 | | // """ " (one quote somewhere in a ML string) |
1493 | 1.19k | case 1: |
1494 | 1.19k | str += '"'; |
1495 | 1.19k | skipping_whitespace = false; |
1496 | 1.19k | continue; |
1497 | | |
1498 | | // """ "" (two quotes somewhere in a ML string) |
1499 | 404 | case 2: |
1500 | 404 | str.append("\"\""sv); |
1501 | 404 | skipping_whitespace = false; |
1502 | 404 | continue; |
1503 | | |
1504 | | // """ """ (the end of the string) |
1505 | 653 | case 3: return str; |
1506 | | |
1507 | | // """ """" (one at the end of the string) |
1508 | 503 | case 4: str += '"'; return str; |
1509 | | |
1510 | | // """ """"" (two quotes at the end of the string) |
1511 | 399 | case 5: |
1512 | 399 | str.append("\"\""sv); |
1513 | 399 | advance_and_return_if_error({}); // skip the last '"' |
1514 | 399 | return str; |
1515 | | |
1516 | 0 | default: TOML_UNREACHABLE; |
1517 | 3.15k | } |
1518 | 3.15k | } |
1519 | 2.83k | else |
1520 | 2.83k | { |
1521 | 2.83k | advance_and_return_if_error({}); // skip the closing delimiter |
1522 | 2.83k | return str; |
1523 | 2.83k | } |
1524 | 5.99k | } |
1525 | | |
1526 | | // handle escapes |
1527 | 7.91M | else if (*cp == U'\\') |
1528 | 5.60k | { |
1529 | 5.60k | advance_and_return_if_error_or_eof({}); // skip the '\' |
1530 | 5.58k | skipping_whitespace = false; |
1531 | 5.58k | escaped = true; |
1532 | 5.58k | continue; |
1533 | 5.60k | } |
1534 | | |
1535 | | // handle line endings in multi-line mode |
1536 | 7.90M | if (multi_line && is_ascii_vertical_whitespace(*cp)) |
1537 | 1.00M | { |
1538 | 1.00M | consume_line_break(); |
1539 | 1.00M | return_if_error({}); |
1540 | 1.00M | if (!skipping_whitespace) |
1541 | 993k | str += '\n'; |
1542 | 1.00M | continue; |
1543 | 1.00M | } |
1544 | | |
1545 | | // handle control characters |
1546 | 6.90M | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1547 | 6.90M | set_error_and_return_default( |
1548 | 6.90M | "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv); |
1549 | | |
1550 | 6.90M | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1551 | | |
1552 | | // handle surrogates in strings |
1553 | 6.90M | if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1554 | 6.90M | set_error_and_return_default( |
1555 | 6.90M | "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv); |
1556 | 6.90M | #endif |
1557 | | |
1558 | 6.90M | if (multi_line) |
1559 | 532k | { |
1560 | 532k | if (!skipping_whitespace || !is_horizontal_whitespace(*cp)) |
1561 | 529k | { |
1562 | 529k | skipping_whitespace = false; |
1563 | 529k | str.append(cp->bytes, cp->count); |
1564 | 529k | } |
1565 | 532k | } |
1566 | 6.37M | else |
1567 | 6.37M | str.append(cp->bytes, cp->count); |
1568 | | |
1569 | 6.90M | advance_and_return_if_error({}); |
1570 | 6.90M | } |
1571 | 7.92M | } |
1572 | 7.92M | while (!is_eof()); |
1573 | | |
1574 | 320 | set_error_and_return_default("encountered end-of-file"sv); |
1575 | 320 | } |
1576 | | |
1577 | | TOML_NODISCARD |
1578 | | TOML_NEVER_INLINE |
1579 | | std::string_view parse_literal_string(bool multi_line) |
1580 | 6.39k | { |
1581 | 6.39k | return_if_error({}); |
1582 | 6.39k | assert_not_eof(); |
1583 | 6.39k | TOML_ASSERT_ASSUME(*cp == U'\''); |
1584 | 6.39k | push_parse_scope("literal string"sv); |
1585 | | |
1586 | | // skip the delimiter |
1587 | 6.39k | advance_and_return_if_error_or_eof({}); |
1588 | | |
1589 | | // multi-line strings ignore a single line ending right at the beginning |
1590 | 6.38k | if (multi_line) |
1591 | 3.65k | { |
1592 | 3.65k | consume_line_break(); |
1593 | 3.65k | return_if_error({}); |
1594 | 3.65k | set_error_and_return_if_eof({}); |
1595 | 3.65k | } |
1596 | | |
1597 | 6.38k | auto& str = string_buffer; |
1598 | 6.38k | str.clear(); |
1599 | 6.38k | do |
1600 | 23.6M | { |
1601 | 23.6M | return_if_error({}); |
1602 | | |
1603 | | // handle closing delimiters |
1604 | 23.6M | if (*cp == U'\'') |
1605 | 9.65k | { |
1606 | 9.65k | if (multi_line) |
1607 | 6.96k | { |
1608 | 6.96k | size_t lookaheads = {}; |
1609 | 6.96k | size_t consecutive_delimiters = 1; |
1610 | 6.96k | do |
1611 | 17.4k | { |
1612 | 17.4k | advance_and_return_if_error({}); |
1613 | 17.4k | lookaheads++; |
1614 | 17.4k | if (!is_eof() && *cp == U'\'') |
1615 | 11.3k | consecutive_delimiters++; |
1616 | 6.09k | else |
1617 | 6.09k | break; |
1618 | 17.4k | } |
1619 | 11.3k | while (lookaheads < 4u); |
1620 | | |
1621 | 6.96k | switch (consecutive_delimiters) |
1622 | 6.96k | { |
1623 | | // ''' ' (one quote somewhere in a ML string) |
1624 | 2.60k | case 1: str += '\''; continue; |
1625 | | |
1626 | | // ''' '' (two quotes somewhere in a ML string) |
1627 | 779 | case 2: str.append("''"sv); continue; |
1628 | | |
1629 | | // ''' ''' (the end of the string) |
1630 | 1.05k | case 3: return str; |
1631 | | |
1632 | | // ''' '''' (one at the end of the string) |
1633 | 1.65k | case 4: str += '\''; return str; |
1634 | | |
1635 | | // ''' ''''' (two quotes at the end of the string) |
1636 | 876 | case 5: |
1637 | 876 | str.append("''"sv); |
1638 | 876 | advance_and_return_if_error({}); // skip the last ' |
1639 | 876 | return str; |
1640 | | |
1641 | 0 | default: TOML_UNREACHABLE; |
1642 | 6.96k | } |
1643 | 6.96k | } |
1644 | 2.68k | else |
1645 | 2.68k | { |
1646 | 2.68k | advance_and_return_if_error({}); // skip the closing delimiter |
1647 | 2.68k | return str; |
1648 | 2.68k | } |
1649 | 9.65k | } |
1650 | | |
1651 | | // handle line endings in multi-line mode |
1652 | 23.6M | if (multi_line && is_ascii_vertical_whitespace(*cp)) |
1653 | 912k | { |
1654 | 912k | consume_line_break(); |
1655 | 912k | return_if_error({}); |
1656 | 912k | str += '\n'; |
1657 | 912k | continue; |
1658 | 912k | } |
1659 | | |
1660 | | // handle control characters |
1661 | 22.7M | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1662 | 22.7M | set_error_and_return_default( |
1663 | 22.7M | "control characters other than TAB (U+0009) are explicitly prohibited"sv); |
1664 | | |
1665 | 22.7M | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1666 | | |
1667 | | // handle surrogates in strings |
1668 | 22.7M | if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1669 | 22.7M | set_error_and_return_default("unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv); |
1670 | 22.7M | #endif |
1671 | | |
1672 | 22.7M | str.append(cp->bytes, cp->count); |
1673 | 22.7M | advance_and_return_if_error({}); |
1674 | 22.7M | } |
1675 | 23.6M | while (!is_eof()); |
1676 | | |
1677 | 106 | set_error_and_return_default("encountered end-of-file"sv); |
1678 | 106 | } |
1679 | | |
1680 | | TOML_NODISCARD |
1681 | | TOML_NEVER_INLINE |
1682 | | parsed_string parse_string() |
1683 | 11.4k | { |
1684 | 11.4k | return_if_error({}); |
1685 | 11.4k | assert_not_eof(); |
1686 | 11.4k | TOML_ASSERT_ASSUME(is_string_delimiter(*cp)); |
1687 | 11.4k | push_parse_scope("string"sv); |
1688 | | |
1689 | | // get the first three characters to determine the string type |
1690 | 11.4k | const auto first = cp->value; |
1691 | 11.4k | advance_and_return_if_error_or_eof({}); |
1692 | 11.4k | const auto second = cp->value; |
1693 | 11.4k | advance_and_return_if_error({}); |
1694 | 11.4k | const auto third = cp ? cp->value : U'\0'; |
1695 | | |
1696 | | // if we were eof at the third character then first and second need to be |
1697 | | // the same string character (otherwise it's an unterminated string) |
1698 | 11.4k | if (is_eof()) |
1699 | 65 | { |
1700 | 65 | if (second == first) |
1701 | 38 | return {}; |
1702 | | |
1703 | 27 | set_error_and_return_default("encountered end-of-file"sv); |
1704 | 27 | } |
1705 | | |
1706 | | // if the first three characters are all the same string delimiter then |
1707 | | // it's a multi-line string. |
1708 | 11.3k | else if (first == second && first == third) |
1709 | 5.54k | { |
1710 | 5.54k | return { first == U'\'' ? parse_literal_string(true) : parse_basic_string(true), true }; |
1711 | 5.54k | } |
1712 | | |
1713 | | // otherwise it's just a regular string. |
1714 | 5.81k | else |
1715 | 5.81k | { |
1716 | | // step back two characters so that the current |
1717 | | // character is the string delimiter |
1718 | 5.81k | go_back(2u); |
1719 | | |
1720 | 5.81k | return { first == U'\'' ? parse_literal_string(false) : parse_basic_string(false), false }; |
1721 | 5.81k | } |
1722 | 11.4k | } |
1723 | | |
1724 | | TOML_NODISCARD |
1725 | | TOML_NEVER_INLINE |
1726 | | std::string_view parse_bare_key_segment() |
1727 | 1.15M | { |
1728 | 1.15M | return_if_error({}); |
1729 | 1.15M | assert_not_eof(); |
1730 | 1.15M | TOML_ASSERT_ASSUME(is_bare_key_character(*cp)); |
1731 | | |
1732 | 1.15M | string_buffer.clear(); |
1733 | | |
1734 | 11.4M | while (!is_eof()) |
1735 | 11.4M | { |
1736 | 11.4M | if (!is_bare_key_character(*cp)) |
1737 | 1.15M | break; |
1738 | | |
1739 | 10.2M | string_buffer.append(cp->bytes, cp->count); |
1740 | 10.2M | advance_and_return_if_error({}); |
1741 | 10.2M | } |
1742 | | |
1743 | 1.15M | return string_buffer; |
1744 | 1.15M | } |
1745 | | |
1746 | | TOML_NODISCARD |
1747 | | TOML_NEVER_INLINE |
1748 | | bool parse_boolean() |
1749 | 1.81k | { |
1750 | 1.81k | return_if_error({}); |
1751 | 1.81k | assert_not_eof(); |
1752 | 1.81k | TOML_ASSERT_ASSUME(is_match(*cp, U't', U'f', U'T', U'F')); |
1753 | 1.81k | push_parse_scope("boolean"sv); |
1754 | | |
1755 | 1.81k | start_recording(true); |
1756 | 1.81k | auto result = is_match(*cp, U't', U'T'); |
1757 | 1.81k | if (!consume_expected_sequence(result ? U"true"sv : U"false"sv)) |
1758 | 1.81k | set_error_and_return_default("expected '"sv, |
1759 | 1.78k | to_sv(result), |
1760 | 1.78k | "', saw '"sv, |
1761 | 1.78k | to_sv(recording_buffer), |
1762 | 1.78k | "'"sv); |
1763 | 1.78k | stop_recording(); |
1764 | | |
1765 | 1.78k | if (cp && !is_value_terminator(*cp)) |
1766 | 1.78k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
1767 | | |
1768 | 1.77k | return result; |
1769 | 1.78k | } |
1770 | | |
1771 | | TOML_NODISCARD |
1772 | | TOML_NEVER_INLINE |
1773 | | double parse_inf_or_nan() |
1774 | 1.65k | { |
1775 | 1.65k | return_if_error({}); |
1776 | 1.65k | assert_not_eof(); |
1777 | 1.65k | TOML_ASSERT_ASSUME(is_match(*cp, U'i', U'n', U'I', U'N', U'+', U'-')); |
1778 | 1.65k | push_parse_scope("floating-point"sv); |
1779 | | |
1780 | 1.65k | start_recording(true); |
1781 | 1.65k | const bool negative = *cp == U'-'; |
1782 | 1.65k | if (negative || *cp == U'+') |
1783 | 382 | advance_and_return_if_error_or_eof({}); |
1784 | | |
1785 | 1.65k | const bool inf = is_match(*cp, U'i', U'I'); |
1786 | 1.65k | if (!consume_expected_sequence(inf ? U"inf"sv : U"nan"sv)) |
1787 | 1.65k | set_error_and_return_default("expected '"sv, |
1788 | 1.63k | inf ? "inf"sv : "nan"sv, |
1789 | 1.63k | "', saw '"sv, |
1790 | 1.63k | to_sv(recording_buffer), |
1791 | 1.63k | "'"sv); |
1792 | 1.63k | stop_recording(); |
1793 | | |
1794 | 1.63k | if (cp && !is_value_terminator(*cp)) |
1795 | 1.63k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
1796 | | |
1797 | 1.60k | return inf ? (negative ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()) |
1798 | 1.60k | : std::numeric_limits<double>::quiet_NaN(); |
1799 | 1.63k | } |
1800 | | |
1801 | | TOML_NODISCARD |
1802 | | TOML_NEVER_INLINE |
1803 | | double parse_float() |
1804 | 4.52k | { |
1805 | 4.52k | return_if_error({}); |
1806 | 4.52k | assert_not_eof(); |
1807 | 4.52k | TOML_ASSERT_ASSUME(is_match(*cp, U'+', U'-', U'.') || is_decimal_digit(*cp)); |
1808 | 4.52k | push_parse_scope("floating-point"sv); |
1809 | | |
1810 | | // sign |
1811 | 4.52k | const int sign = *cp == U'-' ? -1 : 1; |
1812 | 4.52k | if (is_match(*cp, U'+', U'-')) |
1813 | 1.35k | advance_and_return_if_error_or_eof({}); |
1814 | | |
1815 | | // consume value chars |
1816 | 4.52k | char chars[utf8_buffered_reader::max_history_length]; |
1817 | 4.52k | size_t length = {}; |
1818 | 4.52k | const utf8_codepoint* prev = {}; |
1819 | 4.52k | bool seen_decimal = false, seen_exponent = false; |
1820 | 4.52k | char first_integer_part = '\0'; |
1821 | 40.5k | while (!is_eof() && !is_value_terminator(*cp)) |
1822 | 36.1k | { |
1823 | 36.1k | if (*cp == U'_') |
1824 | 867 | { |
1825 | 867 | if (!prev || !is_decimal_digit(*prev)) |
1826 | 867 | set_error_and_return_default("underscores may only follow digits"sv); |
1827 | | |
1828 | 862 | prev = cp; |
1829 | 862 | advance_and_return_if_error_or_eof({}); |
1830 | 861 | continue; |
1831 | 862 | } |
1832 | 35.2k | else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp)) |
1833 | 35.2k | set_error_and_return_default("underscores must be followed by digits"sv); |
1834 | 35.2k | else if TOML_UNLIKELY(length == sizeof(chars)) |
1835 | 35.2k | set_error_and_return_default("exceeds length limit of "sv, |
1836 | 35.2k | sizeof(chars), |
1837 | 35.2k | " digits"sv, |
1838 | 35.2k | (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); |
1839 | 35.2k | else if (*cp == U'.') |
1840 | 3.20k | { |
1841 | | // .1 |
1842 | | // -.1 |
1843 | | // +.1 (no integer part) |
1844 | 3.20k | if (!first_integer_part) |
1845 | 3.20k | set_error_and_return_default("expected decimal digit, saw '.'"sv); |
1846 | | |
1847 | | // 1.0e+.10 (exponent cannot have '.') |
1848 | 3.18k | else if (seen_exponent) |
1849 | 3.18k | set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv); |
1850 | | |
1851 | | // 1.0.e+.10 |
1852 | | // 1..0 |
1853 | | // (multiple '.') |
1854 | 3.18k | else if (seen_decimal) |
1855 | 4 | set_error_and_return_default("expected decimal digit or exponent, saw '.'"sv); |
1856 | | |
1857 | 3.18k | seen_decimal = true; |
1858 | 3.18k | } |
1859 | 32.0k | else if (is_match(*cp, U'e', U'E')) |
1860 | 1.31k | { |
1861 | 1.31k | if (prev && !is_decimal_digit(*prev)) |
1862 | 1.31k | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1863 | | |
1864 | | // 1.0ee+10 (multiple 'e') |
1865 | 1.30k | else if (seen_exponent) |
1866 | 2 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1867 | | |
1868 | 1.30k | seen_decimal = true; // implied |
1869 | 1.30k | seen_exponent = true; |
1870 | 1.30k | } |
1871 | 30.7k | else if (is_match(*cp, U'+', U'-')) |
1872 | 754 | { |
1873 | | // 1.-0 (sign in mantissa) |
1874 | 754 | if (!seen_exponent) |
1875 | 754 | set_error_and_return_default("expected decimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
1876 | | |
1877 | | // 1.0e1-0 (misplaced exponent sign) |
1878 | 752 | else if (!is_match(*prev, U'e', U'E')) |
1879 | 7 | set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv); |
1880 | 754 | } |
1881 | 30.0k | else if (is_decimal_digit(*cp)) |
1882 | 29.9k | { |
1883 | 29.9k | if (!seen_decimal) |
1884 | 17.0k | { |
1885 | 17.0k | if (!first_integer_part) |
1886 | 4.49k | first_integer_part = static_cast<char>(cp->bytes[0]); |
1887 | 12.5k | else if (first_integer_part == '0') |
1888 | 1 | set_error_and_return_default("leading zeroes are prohibited"sv); |
1889 | 17.0k | } |
1890 | 29.9k | } |
1891 | 50 | else |
1892 | 50 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1893 | | |
1894 | 35.1k | chars[length++] = static_cast<char>(cp->bytes[0]); |
1895 | 35.1k | prev = cp; |
1896 | 35.1k | advance_and_return_if_error({}); |
1897 | 35.1k | } |
1898 | | |
1899 | | // sanity-check ending state |
1900 | 4.41k | if (prev) |
1901 | 4.41k | { |
1902 | 4.41k | if (*prev == U'_') |
1903 | 1 | { |
1904 | 1 | set_error_and_return_if_eof({}); |
1905 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); |
1906 | 1 | } |
1907 | 4.41k | else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) |
1908 | 49 | { |
1909 | 49 | set_error_and_return_if_eof({}); |
1910 | 17 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1911 | 17 | } |
1912 | 4.41k | } |
1913 | | |
1914 | | // convert to double |
1915 | 4.36k | double result; |
1916 | | #if TOML_FLOAT_CHARCONV |
1917 | | { |
1918 | | auto fc_result = std::from_chars(chars, chars + length, result); |
1919 | | switch (fc_result.ec) |
1920 | | { |
1921 | | TOML_LIKELY_CASE |
1922 | | case std::errc{}: // ok |
1923 | | return result * sign; |
1924 | | |
1925 | | case std::errc::invalid_argument: |
1926 | | set_error_and_return_default("'"sv, |
1927 | | std::string_view{ chars, length }, |
1928 | | "' could not be interpreted as a value"sv); |
1929 | | break; |
1930 | | |
1931 | | case std::errc::result_out_of_range: |
1932 | | set_error_and_return_default("'"sv, |
1933 | | std::string_view{ chars, length }, |
1934 | | "' is not representable in 64 bits"sv); |
1935 | | break; |
1936 | | |
1937 | | default: //?? |
1938 | | set_error_and_return_default("an unspecified error occurred while trying to interpret '"sv, |
1939 | | std::string_view{ chars, length }, |
1940 | | "' as a value"sv); |
1941 | | } |
1942 | | } |
1943 | | #else |
1944 | 4.36k | { |
1945 | 4.36k | std::stringstream ss; |
1946 | 4.36k | ss.imbue(std::locale::classic()); |
1947 | 4.36k | ss.write(chars, static_cast<std::streamsize>(length)); |
1948 | 4.36k | if ((ss >> result)) |
1949 | 4.35k | return result * sign; |
1950 | 7 | else |
1951 | 4.36k | set_error_and_return_default("'"sv, |
1952 | 4.36k | std::string_view{ chars, length }, |
1953 | 4.36k | "' could not be interpreted as a value"sv); |
1954 | 4.36k | } |
1955 | 4.36k | #endif |
1956 | 4.36k | } |
1957 | | |
1958 | | TOML_NODISCARD |
1959 | | TOML_NEVER_INLINE |
1960 | | double parse_hex_float() |
1961 | 9 | { |
1962 | 9 | return_if_error({}); |
1963 | 9 | assert_not_eof(); |
1964 | 9 | TOML_ASSERT_ASSUME(is_match(*cp, U'0', U'+', U'-')); |
1965 | 9 | push_parse_scope("hexadecimal floating-point"sv); |
1966 | | |
1967 | | #if TOML_LANG_UNRELEASED // toml/issues/562 (hexfloats) |
1968 | | |
1969 | | // sign |
1970 | | const int sign = *cp == U'-' ? -1 : 1; |
1971 | | if (is_match(*cp, U'+', U'-')) |
1972 | | advance_and_return_if_error_or_eof({}); |
1973 | | |
1974 | | // '0' |
1975 | | if (*cp != U'0') |
1976 | | set_error_and_return_default(" expected '0', saw '"sv, to_sv(*cp), "'"sv); |
1977 | | advance_and_return_if_error_or_eof({}); |
1978 | | |
1979 | | // 'x' or 'X' |
1980 | | if (!is_match(*cp, U'x', U'X')) |
1981 | | set_error_and_return_default("expected 'x' or 'X', saw '"sv, to_sv(*cp), "'"sv); |
1982 | | advance_and_return_if_error_or_eof({}); |
1983 | | |
1984 | | // <HEX DIGITS> ([.]<HEX DIGITS>)? [pP] [+-]? <DEC DIGITS> |
1985 | | |
1986 | | // consume value fragments |
1987 | | struct fragment |
1988 | | { |
1989 | | char chars[24]; |
1990 | | size_t length; |
1991 | | double value; |
1992 | | }; |
1993 | | fragment fragments[] = { |
1994 | | {}, // mantissa, whole part |
1995 | | {}, // mantissa, fractional part |
1996 | | {} // exponent |
1997 | | }; |
1998 | | fragment* current_fragment = fragments; |
1999 | | const utf8_codepoint* prev = {}; |
2000 | | int exponent_sign = 1; |
2001 | | while (!is_eof() && !is_value_terminator(*cp)) |
2002 | | { |
2003 | | if (*cp == U'_') |
2004 | | { |
2005 | | if (!prev || !is_hexadecimal_digit(*prev)) |
2006 | | set_error_and_return_default("underscores may only follow digits"sv); |
2007 | | |
2008 | | prev = cp; |
2009 | | advance_and_return_if_error_or_eof({}); |
2010 | | continue; |
2011 | | } |
2012 | | else if (prev && *prev == U'_' && !is_hexadecimal_digit(*cp)) |
2013 | | set_error_and_return_default("underscores must be followed by digits"sv); |
2014 | | else if (*cp == U'.') |
2015 | | { |
2016 | | // 0x10.0p-.0 (exponent cannot have '.') |
2017 | | if (current_fragment == fragments + 2) |
2018 | | set_error_and_return_default("expected exponent digit or sign, saw '.'"sv); |
2019 | | |
2020 | | // 0x10.0.p-0 (multiple '.') |
2021 | | else if (current_fragment == fragments + 1) |
2022 | | set_error_and_return_default("expected hexadecimal digit or exponent, saw '.'"sv); |
2023 | | |
2024 | | else |
2025 | | current_fragment++; |
2026 | | } |
2027 | | else if (is_match(*cp, U'p', U'P')) |
2028 | | { |
2029 | | // 0x10.0pp-0 (multiple 'p') |
2030 | | if (current_fragment == fragments + 2) |
2031 | | set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2032 | | |
2033 | | // 0x.p-0 (mantissa is just '.') |
2034 | | else if (fragments[0].length == 0u && fragments[1].length == 0u) |
2035 | | set_error_and_return_default("expected hexadecimal digit, saw '"sv, to_sv(*cp), "'"sv); |
2036 | | |
2037 | | else |
2038 | | current_fragment = fragments + 2; |
2039 | | } |
2040 | | else if (is_match(*cp, U'+', U'-')) |
2041 | | { |
2042 | | // 0x-10.0p-0 (sign in mantissa) |
2043 | | if (current_fragment != fragments + 2) |
2044 | | set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
2045 | | |
2046 | | // 0x10.0p0- (misplaced exponent sign) |
2047 | | else if (!is_match(*prev, U'p', U'P')) |
2048 | | set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv); |
2049 | | |
2050 | | else |
2051 | | exponent_sign = *cp == U'-' ? -1 : 1; |
2052 | | } |
2053 | | else if (current_fragment < fragments + 2 && !is_hexadecimal_digit(*cp)) |
2054 | | set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
2055 | | else if (current_fragment == fragments + 2 && !is_decimal_digit(*cp)) |
2056 | | set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2057 | | else if (current_fragment->length == sizeof(fragment::chars)) |
2058 | | set_error_and_return_default("fragment exceeeds maximum length of "sv, |
2059 | | sizeof(fragment::chars), |
2060 | | " characters"sv); |
2061 | | else |
2062 | | current_fragment->chars[current_fragment->length++] = static_cast<char>(cp->bytes[0]); |
2063 | | |
2064 | | prev = cp; |
2065 | | advance_and_return_if_error({}); |
2066 | | } |
2067 | | |
2068 | | // sanity-check ending state |
2069 | | if (current_fragment != fragments + 2 || current_fragment->length == 0u) |
2070 | | { |
2071 | | set_error_and_return_if_eof({}); |
2072 | | set_error_and_return_default("missing exponent"sv); |
2073 | | } |
2074 | | else if (prev && *prev == U'_') |
2075 | | { |
2076 | | set_error_and_return_if_eof({}); |
2077 | | set_error_and_return_default("underscores must be followed by digits"sv); |
2078 | | } |
2079 | | |
2080 | | // calculate values for the three fragments |
2081 | | for (int fragment_idx = 0; fragment_idx < 3; fragment_idx++) |
2082 | | { |
2083 | | auto& f = fragments[fragment_idx]; |
2084 | | const uint32_t base = fragment_idx == 2 ? 10u : 16u; |
2085 | | |
2086 | | // left-trim zeroes |
2087 | | const char* c = f.chars; |
2088 | | size_t sig = {}; |
2089 | | while (f.length && *c == '0') |
2090 | | { |
2091 | | f.length--; |
2092 | | c++; |
2093 | | sig++; |
2094 | | } |
2095 | | if (!f.length) |
2096 | | continue; |
2097 | | |
2098 | | // calculate value |
2099 | | auto place = 1u; |
2100 | | for (size_t i = 0; i < f.length - 1u; i++) |
2101 | | place *= base; |
2102 | | uint32_t val{}; |
2103 | | while (place) |
2104 | | { |
2105 | | if (base == 16) |
2106 | | val += place * hex_to_dec(*c); |
2107 | | else |
2108 | | val += place * static_cast<uint32_t>(*c - '0'); |
2109 | | if (fragment_idx == 1) |
2110 | | sig++; |
2111 | | c++; |
2112 | | place /= base; |
2113 | | } |
2114 | | f.value = static_cast<double>(val); |
2115 | | |
2116 | | // shift the fractional part |
2117 | | if (fragment_idx == 1) |
2118 | | { |
2119 | | while (sig--) |
2120 | | f.value /= base; |
2121 | | } |
2122 | | } |
2123 | | |
2124 | | return (fragments[0].value + fragments[1].value) * pow(2.0, fragments[2].value * exponent_sign) * sign; |
2125 | | |
2126 | | #else // !TOML_LANG_UNRELEASED |
2127 | | |
2128 | 9 | set_error_and_return_default("hexadecimal floating-point values are not supported " |
2129 | 9 | "in TOML 1.0.0 and earlier"sv); |
2130 | | |
2131 | 9 | #endif // !TOML_LANG_UNRELEASED |
2132 | 9 | } |
2133 | | |
2134 | | template <uint64_t base> |
2135 | | TOML_NODISCARD |
2136 | | TOML_NEVER_INLINE |
2137 | | int64_t parse_integer() |
2138 | 15.2k | { |
2139 | 15.2k | return_if_error({}); |
2140 | 15.2k | assert_not_eof(); |
2141 | 15.2k | using traits = parse_integer_traits<base>; |
2142 | 15.2k | push_parse_scope(traits::scope_qualifier); |
2143 | | |
2144 | 15.2k | [[maybe_unused]] int64_t sign = 1; |
2145 | | if constexpr (traits::is_signed) |
2146 | 11.7k | { |
2147 | 11.7k | sign = *cp == U'-' ? -1 : 1; |
2148 | 11.7k | if (is_match(*cp, U'+', U'-')) |
2149 | 2.82k | advance_and_return_if_error_or_eof({}); |
2150 | 11.7k | } |
2151 | | |
2152 | | if constexpr (base == 10) |
2153 | 11.7k | { |
2154 | 11.7k | if (!traits::is_digit(*cp)) |
2155 | 11.7k | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2156 | | } |
2157 | | else |
2158 | 3.45k | { |
2159 | | // '0' |
2160 | 3.45k | if (*cp != U'0') |
2161 | 3.45k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); |
2162 | 3.45k | advance_and_return_if_error_or_eof({}); |
2163 | | |
2164 | | // 'b', 'o', 'x' |
2165 | 3.45k | if (*cp != traits::prefix_codepoint) |
2166 | 3.45k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); |
2167 | 3.44k | advance_and_return_if_error_or_eof({}); |
2168 | | |
2169 | 3.43k | if (!traits::is_digit(*cp)) |
2170 | 3.43k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); |
2171 | 3.43k | } |
2172 | | |
2173 | | // consume digits |
2174 | 15.0k | char digits[utf8_buffered_reader::max_history_length]; |
2175 | 15.2k | size_t length = {}; |
2176 | 15.2k | const utf8_codepoint* prev = {}; |
2177 | 124k | while (!is_eof() && !is_value_terminator(*cp)) |
2178 | 109k | { |
2179 | 109k | if (*cp == U'_') |
2180 | 16.4k | { |
2181 | 16.4k | if (!prev || !traits::is_digit(*prev)) |
2182 | 16.4k | set_error_and_return_default("underscores may only follow digits"sv); |
2183 | | |
2184 | 16.3k | prev = cp; |
2185 | 16.3k | advance_and_return_if_error_or_eof({}); |
2186 | 16.3k | continue; |
2187 | 16.3k | } |
2188 | 93.1k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) |
2189 | 93.1k | set_error_and_return_default("underscores must be followed by digits"sv); |
2190 | 93.1k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) |
2191 | 93.1k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); |
2192 | 92.9k | else if TOML_UNLIKELY(length == sizeof(digits)) |
2193 | 92.9k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); |
2194 | 92.9k | else |
2195 | 92.9k | digits[length++] = static_cast<char>(cp->bytes[0]); |
2196 | | |
2197 | 92.9k | prev = cp; |
2198 | 92.9k | advance_and_return_if_error({}); |
2199 | 92.9k | } |
2200 | | |
2201 | | // sanity check ending state |
2202 | 14.9k | if (prev && *prev == U'_') |
2203 | 4 | { |
2204 | 4 | set_error_and_return_if_eof({}); |
2205 | 4 | set_error_and_return_default("underscores must be followed by digits"sv); |
2206 | 4 | } |
2207 | | |
2208 | | // single digits can be converted trivially |
2209 | 14.9k | if (length == 1u) |
2210 | 1.17k | { |
2211 | 1.17k | int64_t result; |
2212 | | |
2213 | | if constexpr (base == 16) |
2214 | 549 | result = static_cast<int64_t>(hex_to_dec(digits[0])); |
2215 | | else |
2216 | 628 | result = static_cast<int64_t>(digits[0] - '0'); |
2217 | | |
2218 | | if constexpr (traits::is_signed) |
2219 | 0 | result *= sign; |
2220 | | |
2221 | 1.17k | return result; |
2222 | 1.17k | } |
2223 | | |
2224 | | // bin, oct and hex allow leading zeroes so trim them first |
2225 | 13.7k | const char* end = digits + length; |
2226 | 13.7k | const char* msd = digits; |
2227 | | if constexpr (base != 10) |
2228 | 2.10k | { |
2229 | 19.0k | while (msd < end && *msd == '0') |
2230 | 16.9k | msd++; |
2231 | 2.10k | if (msd == end) |
2232 | 784 | return 0ll; |
2233 | | } |
2234 | | |
2235 | | // decimal integers do not allow leading zeroes |
2236 | | else |
2237 | 11.6k | { |
2238 | 11.6k | if TOML_UNLIKELY(digits[0] == '0') |
2239 | 11.6k | set_error_and_return_default("leading zeroes are prohibited"sv); |
2240 | 11.6k | } |
2241 | | |
2242 | | // range check |
2243 | 13.7k | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) |
2244 | 13.7k | set_error_and_return_default("'"sv, |
2245 | 13.7k | traits::full_prefix, |
2246 | 13.7k | std::string_view{ digits, length }, |
2247 | 13.7k | "' is not representable as a signed 64-bit integer"sv); |
2248 | | |
2249 | | // do the thing |
2250 | 13.7k | { |
2251 | 13.7k | uint64_t result = {}; |
2252 | 13.7k | { |
2253 | 13.7k | uint64_t power = 1; |
2254 | 85.3k | while (--end >= msd) |
2255 | 71.6k | { |
2256 | | if constexpr (base == 16) |
2257 | 3.78k | result += power * hex_to_dec(*end); |
2258 | | else |
2259 | 67.8k | result += power * static_cast<uint64_t>(*end - '0'); |
2260 | | |
2261 | 71.6k | power *= base; |
2262 | 71.6k | } |
2263 | 13.7k | } |
2264 | | |
2265 | | // range check |
2266 | 13.7k | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); |
2267 | 13.7k | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) |
2268 | 13.7k | set_error_and_return_default("'"sv, |
2269 | 13.6k | traits::full_prefix, |
2270 | 13.6k | std::string_view{ digits, length }, |
2271 | 13.6k | "' is not representable as a signed 64-bit integer"sv); |
2272 | | |
2273 | | if constexpr (traits::is_signed) |
2274 | 11.5k | { |
2275 | | // avoid signed multiply UB when parsing INT64_MIN |
2276 | 11.5k | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) |
2277 | 178 | return (std::numeric_limits<int64_t>::min)(); |
2278 | | |
2279 | 11.4k | return static_cast<int64_t>(result) * sign; |
2280 | | } |
2281 | | else |
2282 | 2.07k | return static_cast<int64_t>(result); |
2283 | 13.6k | } |
2284 | 13.6k | } long toml::v3::impl::impl_ex::parser::parse_integer<16ul>() Line | Count | Source | 2138 | 1.42k | { | 2139 | 1.42k | return_if_error({}); | 2140 | 1.42k | assert_not_eof(); | 2141 | 1.42k | using traits = parse_integer_traits<base>; | 2142 | 1.42k | push_parse_scope(traits::scope_qualifier); | 2143 | | | 2144 | 1.42k | [[maybe_unused]] int64_t sign = 1; | 2145 | | if constexpr (traits::is_signed) | 2146 | | { | 2147 | | sign = *cp == U'-' ? -1 : 1; | 2148 | | if (is_match(*cp, U'+', U'-')) | 2149 | | advance_and_return_if_error_or_eof({}); | 2150 | | } | 2151 | | | 2152 | | if constexpr (base == 10) | 2153 | | { | 2154 | | if (!traits::is_digit(*cp)) | 2155 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2156 | | } | 2157 | | else | 2158 | 1.42k | { | 2159 | | // '0' | 2160 | 1.42k | if (*cp != U'0') | 2161 | 1.42k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2162 | 1.41k | advance_and_return_if_error_or_eof({}); | 2163 | | | 2164 | | // 'b', 'o', 'x' | 2165 | 1.41k | if (*cp != traits::prefix_codepoint) | 2166 | 1.41k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 1.41k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | 1.41k | if (!traits::is_digit(*cp)) | 2170 | 1.41k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2171 | 1.41k | } | 2172 | | | 2173 | | // consume digits | 2174 | 1.39k | char digits[utf8_buffered_reader::max_history_length]; | 2175 | 1.42k | size_t length = {}; | 2176 | 1.42k | const utf8_codepoint* prev = {}; | 2177 | 29.4k | while (!is_eof() && !is_value_terminator(*cp)) | 2178 | 28.1k | { | 2179 | 28.1k | if (*cp == U'_') | 2180 | 10.9k | { | 2181 | 10.9k | if (!prev || !traits::is_digit(*prev)) | 2182 | 10.9k | set_error_and_return_default("underscores may only follow digits"sv); | 2183 | | | 2184 | 10.9k | prev = cp; | 2185 | 10.9k | advance_and_return_if_error_or_eof({}); | 2186 | 10.9k | continue; | 2187 | 10.9k | } | 2188 | 17.1k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2189 | 17.1k | set_error_and_return_default("underscores must be followed by digits"sv); | 2190 | 17.1k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2191 | 17.1k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2192 | 17.1k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2193 | 17.1k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2194 | 17.1k | else | 2195 | 17.1k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2196 | | | 2197 | 17.1k | prev = cp; | 2198 | 17.1k | advance_and_return_if_error({}); | 2199 | 17.1k | } | 2200 | | | 2201 | | // sanity check ending state | 2202 | 1.36k | if (prev && *prev == U'_') | 2203 | 1 | { | 2204 | 1 | set_error_and_return_if_eof({}); | 2205 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2206 | 1 | } | 2207 | | | 2208 | | // single digits can be converted trivially | 2209 | 1.36k | if (length == 1u) | 2210 | 549 | { | 2211 | 549 | int64_t result; | 2212 | | | 2213 | | if constexpr (base == 16) | 2214 | 549 | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2215 | | else | 2216 | | result = static_cast<int64_t>(digits[0] - '0'); | 2217 | | | 2218 | | if constexpr (traits::is_signed) | 2219 | | result *= sign; | 2220 | | | 2221 | 549 | return result; | 2222 | 549 | } | 2223 | | | 2224 | | // bin, oct and hex allow leading zeroes so trim them first | 2225 | 817 | const char* end = digits + length; | 2226 | 817 | const char* msd = digits; | 2227 | | if constexpr (base != 10) | 2228 | 817 | { | 2229 | 12.7k | while (msd < end && *msd == '0') | 2230 | 11.9k | msd++; | 2231 | 817 | if (msd == end) | 2232 | 214 | return 0ll; | 2233 | | } | 2234 | | | 2235 | | // decimal integers do not allow leading zeroes | 2236 | | else | 2237 | | { | 2238 | | if TOML_UNLIKELY(digits[0] == '0') | 2239 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2240 | | } | 2241 | | | 2242 | | // range check | 2243 | 817 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2244 | 817 | set_error_and_return_default("'"sv, | 2245 | 811 | traits::full_prefix, | 2246 | 811 | std::string_view{ digits, length }, | 2247 | 811 | "' is not representable as a signed 64-bit integer"sv); | 2248 | | | 2249 | | // do the thing | 2250 | 811 | { | 2251 | 811 | uint64_t result = {}; | 2252 | 811 | { | 2253 | 811 | uint64_t power = 1; | 2254 | 4.59k | while (--end >= msd) | 2255 | 3.78k | { | 2256 | | if constexpr (base == 16) | 2257 | 3.78k | result += power * hex_to_dec(*end); | 2258 | | else | 2259 | | result += power * static_cast<uint64_t>(*end - '0'); | 2260 | | | 2261 | 3.78k | power *= base; | 2262 | 3.78k | } | 2263 | 811 | } | 2264 | | | 2265 | | // range check | 2266 | 811 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2267 | 811 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2268 | 811 | set_error_and_return_default("'"sv, | 2269 | 793 | traits::full_prefix, | 2270 | 793 | std::string_view{ digits, length }, | 2271 | 793 | "' is not representable as a signed 64-bit integer"sv); | 2272 | | | 2273 | | if constexpr (traits::is_signed) | 2274 | | { | 2275 | | // avoid signed multiply UB when parsing INT64_MIN | 2276 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2277 | | return (std::numeric_limits<int64_t>::min)(); | 2278 | | | 2279 | | return static_cast<int64_t>(result) * sign; | 2280 | | } | 2281 | | else | 2282 | 793 | return static_cast<int64_t>(result); | 2283 | 793 | } | 2284 | 793 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<8ul>() Line | Count | Source | 2138 | 1.01k | { | 2139 | 1.01k | return_if_error({}); | 2140 | 1.01k | assert_not_eof(); | 2141 | 1.01k | using traits = parse_integer_traits<base>; | 2142 | 1.01k | push_parse_scope(traits::scope_qualifier); | 2143 | | | 2144 | 1.01k | [[maybe_unused]] int64_t sign = 1; | 2145 | | if constexpr (traits::is_signed) | 2146 | | { | 2147 | | sign = *cp == U'-' ? -1 : 1; | 2148 | | if (is_match(*cp, U'+', U'-')) | 2149 | | advance_and_return_if_error_or_eof({}); | 2150 | | } | 2151 | | | 2152 | | if constexpr (base == 10) | 2153 | | { | 2154 | | if (!traits::is_digit(*cp)) | 2155 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2156 | | } | 2157 | | else | 2158 | 1.01k | { | 2159 | | // '0' | 2160 | 1.01k | if (*cp != U'0') | 2161 | 1.01k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2162 | 1.01k | advance_and_return_if_error_or_eof({}); | 2163 | | | 2164 | | // 'b', 'o', 'x' | 2165 | 1.01k | if (*cp != traits::prefix_codepoint) | 2166 | 1.01k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 1.00k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | 1.00k | if (!traits::is_digit(*cp)) | 2170 | 1.00k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2171 | 1.00k | } | 2172 | | | 2173 | | // consume digits | 2174 | 974 | char digits[utf8_buffered_reader::max_history_length]; | 2175 | 1.01k | size_t length = {}; | 2176 | 1.01k | const utf8_codepoint* prev = {}; | 2177 | 7.01k | while (!is_eof() && !is_value_terminator(*cp)) | 2178 | 6.05k | { | 2179 | 6.05k | if (*cp == U'_') | 2180 | 747 | { | 2181 | 747 | if (!prev || !traits::is_digit(*prev)) | 2182 | 747 | set_error_and_return_default("underscores may only follow digits"sv); | 2183 | | | 2184 | 743 | prev = cp; | 2185 | 743 | advance_and_return_if_error_or_eof({}); | 2186 | 741 | continue; | 2187 | 743 | } | 2188 | 5.30k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2189 | 5.30k | set_error_and_return_default("underscores must be followed by digits"sv); | 2190 | 5.29k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2191 | 5.29k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2192 | 5.26k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2193 | 5.26k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2194 | 5.26k | else | 2195 | 5.26k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2196 | | | 2197 | 5.26k | prev = cp; | 2198 | 5.26k | advance_and_return_if_error({}); | 2199 | 5.26k | } | 2200 | | | 2201 | | // sanity check ending state | 2202 | 961 | if (prev && *prev == U'_') | 2203 | 1 | { | 2204 | 1 | set_error_and_return_if_eof({}); | 2205 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2206 | 1 | } | 2207 | | | 2208 | | // single digits can be converted trivially | 2209 | 960 | if (length == 1u) | 2210 | 234 | { | 2211 | 234 | int64_t result; | 2212 | | | 2213 | | if constexpr (base == 16) | 2214 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2215 | | else | 2216 | 234 | result = static_cast<int64_t>(digits[0] - '0'); | 2217 | | | 2218 | | if constexpr (traits::is_signed) | 2219 | | result *= sign; | 2220 | | | 2221 | 234 | return result; | 2222 | 234 | } | 2223 | | | 2224 | | // bin, oct and hex allow leading zeroes so trim them first | 2225 | 726 | const char* end = digits + length; | 2226 | 726 | const char* msd = digits; | 2227 | | if constexpr (base != 10) | 2228 | 726 | { | 2229 | 3.48k | while (msd < end && *msd == '0') | 2230 | 2.76k | msd++; | 2231 | 726 | if (msd == end) | 2232 | 348 | return 0ll; | 2233 | | } | 2234 | | | 2235 | | // decimal integers do not allow leading zeroes | 2236 | | else | 2237 | | { | 2238 | | if TOML_UNLIKELY(digits[0] == '0') | 2239 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2240 | | } | 2241 | | | 2242 | | // range check | 2243 | 726 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2244 | 726 | set_error_and_return_default("'"sv, | 2245 | 725 | traits::full_prefix, | 2246 | 725 | std::string_view{ digits, length }, | 2247 | 725 | "' is not representable as a signed 64-bit integer"sv); | 2248 | | | 2249 | | // do the thing | 2250 | 725 | { | 2251 | 725 | uint64_t result = {}; | 2252 | 725 | { | 2253 | 725 | uint64_t power = 1; | 2254 | 2.43k | while (--end >= msd) | 2255 | 1.70k | { | 2256 | | if constexpr (base == 16) | 2257 | | result += power * hex_to_dec(*end); | 2258 | | else | 2259 | 1.70k | result += power * static_cast<uint64_t>(*end - '0'); | 2260 | | | 2261 | 1.70k | power *= base; | 2262 | 1.70k | } | 2263 | 725 | } | 2264 | | | 2265 | | // range check | 2266 | 725 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2267 | 725 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2268 | 725 | set_error_and_return_default("'"sv, | 2269 | 725 | traits::full_prefix, | 2270 | 725 | std::string_view{ digits, length }, | 2271 | 725 | "' is not representable as a signed 64-bit integer"sv); | 2272 | | | 2273 | | if constexpr (traits::is_signed) | 2274 | | { | 2275 | | // avoid signed multiply UB when parsing INT64_MIN | 2276 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2277 | | return (std::numeric_limits<int64_t>::min)(); | 2278 | | | 2279 | | return static_cast<int64_t>(result) * sign; | 2280 | | } | 2281 | | else | 2282 | 725 | return static_cast<int64_t>(result); | 2283 | 725 | } | 2284 | 725 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<2ul>() Line | Count | Source | 2138 | 1.02k | { | 2139 | 1.02k | return_if_error({}); | 2140 | 1.02k | assert_not_eof(); | 2141 | 1.02k | using traits = parse_integer_traits<base>; | 2142 | 1.02k | push_parse_scope(traits::scope_qualifier); | 2143 | | | 2144 | 1.02k | [[maybe_unused]] int64_t sign = 1; | 2145 | | if constexpr (traits::is_signed) | 2146 | | { | 2147 | | sign = *cp == U'-' ? -1 : 1; | 2148 | | if (is_match(*cp, U'+', U'-')) | 2149 | | advance_and_return_if_error_or_eof({}); | 2150 | | } | 2151 | | | 2152 | | if constexpr (base == 10) | 2153 | | { | 2154 | | if (!traits::is_digit(*cp)) | 2155 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2156 | | } | 2157 | | else | 2158 | 1.02k | { | 2159 | | // '0' | 2160 | 1.02k | if (*cp != U'0') | 2161 | 1.02k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2162 | 1.02k | advance_and_return_if_error_or_eof({}); | 2163 | | | 2164 | | // 'b', 'o', 'x' | 2165 | 1.02k | if (*cp != traits::prefix_codepoint) | 2166 | 1.02k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 1.02k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | 1.01k | if (!traits::is_digit(*cp)) | 2170 | 1.01k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2171 | 1.01k | } | 2172 | | | 2173 | | // consume digits | 2174 | 989 | char digits[utf8_buffered_reader::max_history_length]; | 2175 | 1.02k | size_t length = {}; | 2176 | 1.02k | const utf8_codepoint* prev = {}; | 2177 | 9.76k | while (!is_eof() && !is_value_terminator(*cp)) | 2178 | 8.81k | { | 2179 | 8.81k | if (*cp == U'_') | 2180 | 1.53k | { | 2181 | 1.53k | if (!prev || !traits::is_digit(*prev)) | 2182 | 1.53k | set_error_and_return_default("underscores may only follow digits"sv); | 2183 | | | 2184 | 1.51k | prev = cp; | 2185 | 1.51k | advance_and_return_if_error_or_eof({}); | 2186 | 1.51k | continue; | 2187 | 1.51k | } | 2188 | 7.27k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2189 | 7.27k | set_error_and_return_default("underscores must be followed by digits"sv); | 2190 | 7.26k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2191 | 7.26k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2192 | 7.23k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2193 | 7.23k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2194 | 7.23k | else | 2195 | 7.23k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2196 | | | 2197 | 7.23k | prev = cp; | 2198 | 7.23k | advance_and_return_if_error({}); | 2199 | 7.23k | } | 2200 | | | 2201 | | // sanity check ending state | 2202 | 955 | if (prev && *prev == U'_') | 2203 | 1 | { | 2204 | 1 | set_error_and_return_if_eof({}); | 2205 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2206 | 1 | } | 2207 | | | 2208 | | // single digits can be converted trivially | 2209 | 954 | if (length == 1u) | 2210 | 394 | { | 2211 | 394 | int64_t result; | 2212 | | | 2213 | | if constexpr (base == 16) | 2214 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2215 | | else | 2216 | 394 | result = static_cast<int64_t>(digits[0] - '0'); | 2217 | | | 2218 | | if constexpr (traits::is_signed) | 2219 | | result *= sign; | 2220 | | | 2221 | 394 | return result; | 2222 | 394 | } | 2223 | | | 2224 | | // bin, oct and hex allow leading zeroes so trim them first | 2225 | 560 | const char* end = digits + length; | 2226 | 560 | const char* msd = digits; | 2227 | | if constexpr (base != 10) | 2228 | 560 | { | 2229 | 2.73k | while (msd < end && *msd == '0') | 2230 | 2.17k | msd++; | 2231 | 560 | if (msd == end) | 2232 | 222 | return 0ll; | 2233 | | } | 2234 | | | 2235 | | // decimal integers do not allow leading zeroes | 2236 | | else | 2237 | | { | 2238 | | if TOML_UNLIKELY(digits[0] == '0') | 2239 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2240 | | } | 2241 | | | 2242 | | // range check | 2243 | 560 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2244 | 560 | set_error_and_return_default("'"sv, | 2245 | 559 | traits::full_prefix, | 2246 | 559 | std::string_view{ digits, length }, | 2247 | 559 | "' is not representable as a signed 64-bit integer"sv); | 2248 | | | 2249 | | // do the thing | 2250 | 559 | { | 2251 | 559 | uint64_t result = {}; | 2252 | 559 | { | 2253 | 559 | uint64_t power = 1; | 2254 | 3.88k | while (--end >= msd) | 2255 | 3.32k | { | 2256 | | if constexpr (base == 16) | 2257 | | result += power * hex_to_dec(*end); | 2258 | | else | 2259 | 3.32k | result += power * static_cast<uint64_t>(*end - '0'); | 2260 | | | 2261 | 3.32k | power *= base; | 2262 | 3.32k | } | 2263 | 559 | } | 2264 | | | 2265 | | // range check | 2266 | 559 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2267 | 559 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2268 | 559 | set_error_and_return_default("'"sv, | 2269 | 559 | traits::full_prefix, | 2270 | 559 | std::string_view{ digits, length }, | 2271 | 559 | "' is not representable as a signed 64-bit integer"sv); | 2272 | | | 2273 | | if constexpr (traits::is_signed) | 2274 | | { | 2275 | | // avoid signed multiply UB when parsing INT64_MIN | 2276 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2277 | | return (std::numeric_limits<int64_t>::min)(); | 2278 | | | 2279 | | return static_cast<int64_t>(result) * sign; | 2280 | | } | 2281 | | else | 2282 | 559 | return static_cast<int64_t>(result); | 2283 | 559 | } | 2284 | 559 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<10ul>() Line | Count | Source | 2138 | 11.7k | { | 2139 | 11.7k | return_if_error({}); | 2140 | 11.7k | assert_not_eof(); | 2141 | 11.7k | using traits = parse_integer_traits<base>; | 2142 | 11.7k | push_parse_scope(traits::scope_qualifier); | 2143 | | | 2144 | 11.7k | [[maybe_unused]] int64_t sign = 1; | 2145 | | if constexpr (traits::is_signed) | 2146 | 11.7k | { | 2147 | 11.7k | sign = *cp == U'-' ? -1 : 1; | 2148 | 11.7k | if (is_match(*cp, U'+', U'-')) | 2149 | 2.82k | advance_and_return_if_error_or_eof({}); | 2150 | 11.7k | } | 2151 | | | 2152 | | if constexpr (base == 10) | 2153 | 11.7k | { | 2154 | 11.7k | if (!traits::is_digit(*cp)) | 2155 | 11.7k | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2156 | | } | 2157 | | else | 2158 | | { | 2159 | | // '0' | 2160 | | if (*cp != U'0') | 2161 | | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2162 | | advance_and_return_if_error_or_eof({}); | 2163 | | | 2164 | | // 'b', 'o', 'x' | 2165 | | if (*cp != traits::prefix_codepoint) | 2166 | | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2167 | | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | | if (!traits::is_digit(*cp)) | 2170 | | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2171 | | } | 2172 | | | 2173 | | // consume digits | 2174 | 11.7k | char digits[utf8_buffered_reader::max_history_length]; | 2175 | 11.7k | size_t length = {}; | 2176 | 11.7k | const utf8_codepoint* prev = {}; | 2177 | 78.2k | while (!is_eof() && !is_value_terminator(*cp)) | 2178 | 66.6k | { | 2179 | 66.6k | if (*cp == U'_') | 2180 | 3.20k | { | 2181 | 3.20k | if (!prev || !traits::is_digit(*prev)) | 2182 | 3.20k | set_error_and_return_default("underscores may only follow digits"sv); | 2183 | | | 2184 | 3.19k | prev = cp; | 2185 | 3.19k | advance_and_return_if_error_or_eof({}); | 2186 | 3.19k | continue; | 2187 | 3.19k | } | 2188 | 63.3k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2189 | 63.3k | set_error_and_return_default("underscores must be followed by digits"sv); | 2190 | 63.3k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2191 | 63.3k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2192 | 63.2k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2193 | 63.2k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2194 | 63.2k | else | 2195 | 63.2k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2196 | | | 2197 | 63.2k | prev = cp; | 2198 | 63.2k | advance_and_return_if_error({}); | 2199 | 63.2k | } | 2200 | | | 2201 | | // sanity check ending state | 2202 | 11.6k | if (prev && *prev == U'_') | 2203 | 1 | { | 2204 | 1 | set_error_and_return_if_eof({}); | 2205 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2206 | 1 | } | 2207 | | | 2208 | | // single digits can be converted trivially | 2209 | 11.6k | if (length == 1u) | 2210 | 0 | { | 2211 | 0 | int64_t result; | 2212 | |
| 2213 | | if constexpr (base == 16) | 2214 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2215 | | else | 2216 | 0 | result = static_cast<int64_t>(digits[0] - '0'); | 2217 | |
| 2218 | | if constexpr (traits::is_signed) | 2219 | 0 | result *= sign; | 2220 | |
| 2221 | 0 | return result; | 2222 | 0 | } | 2223 | | | 2224 | | // bin, oct and hex allow leading zeroes so trim them first | 2225 | 11.6k | const char* end = digits + length; | 2226 | 11.6k | const char* msd = digits; | 2227 | | if constexpr (base != 10) | 2228 | | { | 2229 | | while (msd < end && *msd == '0') | 2230 | | msd++; | 2231 | | if (msd == end) | 2232 | | return 0ll; | 2233 | | } | 2234 | | | 2235 | | // decimal integers do not allow leading zeroes | 2236 | | else | 2237 | 11.6k | { | 2238 | 11.6k | if TOML_UNLIKELY(digits[0] == '0') | 2239 | 11.6k | set_error_and_return_default("leading zeroes are prohibited"sv); | 2240 | 11.6k | } | 2241 | | | 2242 | | // range check | 2243 | 11.6k | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2244 | 11.6k | set_error_and_return_default("'"sv, | 2245 | 11.6k | traits::full_prefix, | 2246 | 11.6k | std::string_view{ digits, length }, | 2247 | 11.6k | "' is not representable as a signed 64-bit integer"sv); | 2248 | | | 2249 | | // do the thing | 2250 | 11.6k | { | 2251 | 11.6k | uint64_t result = {}; | 2252 | 11.6k | { | 2253 | 11.6k | uint64_t power = 1; | 2254 | 74.4k | while (--end >= msd) | 2255 | 62.8k | { | 2256 | | if constexpr (base == 16) | 2257 | | result += power * hex_to_dec(*end); | 2258 | | else | 2259 | 62.8k | result += power * static_cast<uint64_t>(*end - '0'); | 2260 | | | 2261 | 62.8k | power *= base; | 2262 | 62.8k | } | 2263 | 11.6k | } | 2264 | | | 2265 | | // range check | 2266 | 11.6k | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2267 | 11.6k | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2268 | 11.6k | set_error_and_return_default("'"sv, | 2269 | 11.5k | traits::full_prefix, | 2270 | 11.5k | std::string_view{ digits, length }, | 2271 | 11.5k | "' is not representable as a signed 64-bit integer"sv); | 2272 | | | 2273 | | if constexpr (traits::is_signed) | 2274 | 11.5k | { | 2275 | | // avoid signed multiply UB when parsing INT64_MIN | 2276 | 11.5k | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2277 | 178 | return (std::numeric_limits<int64_t>::min)(); | 2278 | | | 2279 | 11.4k | return static_cast<int64_t>(result) * sign; | 2280 | | } | 2281 | | else | 2282 | | return static_cast<int64_t>(result); | 2283 | 11.5k | } | 2284 | 11.5k | } |
|
2285 | | |
2286 | | TOML_NODISCARD |
2287 | | TOML_NEVER_INLINE |
2288 | | date parse_date(bool part_of_datetime = false) |
2289 | 8.09k | { |
2290 | 8.09k | return_if_error({}); |
2291 | 8.09k | assert_not_eof(); |
2292 | 8.09k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2293 | 8.09k | push_parse_scope("date"sv); |
2294 | | |
2295 | | // "YYYY" |
2296 | 8.09k | uint32_t digits[4]; |
2297 | 8.09k | if (!consume_digit_sequence(digits, 4u)) |
2298 | 8.09k | set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv); |
2299 | 8.02k | const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u + digits[0] * 1000u; |
2300 | 8.02k | const auto is_leap_year = (year % 4u == 0u) && ((year % 100u != 0u) || (year % 400u == 0u)); |
2301 | 8.02k | set_error_and_return_if_eof({}); |
2302 | | |
2303 | | // '-' |
2304 | 8.02k | if (*cp != U'-') |
2305 | 8.02k | set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv); |
2306 | 7.99k | advance_and_return_if_error_or_eof({}); |
2307 | | |
2308 | | // "MM" |
2309 | 7.99k | if (!consume_digit_sequence(digits, 2u)) |
2310 | 7.99k | set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv); |
2311 | 7.96k | const auto month = digits[1] + digits[0] * 10u; |
2312 | 7.96k | if (month == 0u || month > 12u) |
2313 | 7.96k | set_error_and_return_default("expected month between 1 and 12 (inclusive), saw "sv, month); |
2314 | 7.96k | const auto max_days_in_month = month == 2u |
2315 | 7.96k | ? (is_leap_year ? 29u : 28u) |
2316 | 7.96k | : (month == 4u || month == 6u || month == 9u || month == 11u ? 30u : 31u); |
2317 | 7.96k | set_error_and_return_if_eof({}); |
2318 | | |
2319 | | // '-' |
2320 | 7.95k | if (*cp != U'-') |
2321 | 7.95k | set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv); |
2322 | 7.94k | advance_and_return_if_error_or_eof({}); |
2323 | | |
2324 | | // "DD" |
2325 | 7.94k | if (!consume_digit_sequence(digits, 2u)) |
2326 | 7.94k | set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv); |
2327 | 7.91k | const auto day = digits[1] + digits[0] * 10u; |
2328 | 7.91k | if (day == 0u || day > max_days_in_month) |
2329 | 7.91k | set_error_and_return_default("expected day between 1 and "sv, |
2330 | 7.90k | max_days_in_month, |
2331 | 7.90k | " (inclusive), saw "sv, |
2332 | 7.90k | day); |
2333 | | |
2334 | 7.90k | if (!part_of_datetime && !is_eof() && !is_value_terminator(*cp)) |
2335 | 7.90k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2336 | | |
2337 | 7.89k | return { year, month, day }; |
2338 | 7.90k | } |
2339 | | |
2340 | | TOML_NODISCARD |
2341 | | TOML_NEVER_INLINE |
2342 | | time parse_time(bool part_of_datetime = false) |
2343 | 7.42k | { |
2344 | 7.42k | return_if_error({}); |
2345 | 7.42k | assert_not_eof(); |
2346 | 7.42k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2347 | 7.42k | push_parse_scope("time"sv); |
2348 | | |
2349 | 7.42k | static constexpr size_t max_digits = 64; // far more than necessary but needed to allow fractional |
2350 | | // millisecond truncation per the spec |
2351 | 7.42k | uint32_t digits[max_digits]; |
2352 | | |
2353 | | // "HH" |
2354 | 7.42k | if (!consume_digit_sequence(digits, 2u)) |
2355 | 7.42k | set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); |
2356 | 7.36k | const auto hour = digits[1] + digits[0] * 10u; |
2357 | 7.36k | if (hour > 23u) |
2358 | 7.36k | set_error_and_return_default("expected hour between 0 to 59 (inclusive), saw "sv, hour); |
2359 | 7.35k | set_error_and_return_if_eof({}); |
2360 | | |
2361 | | // ':' |
2362 | 7.35k | if (*cp != U':') |
2363 | 7.35k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2364 | 7.34k | advance_and_return_if_error_or_eof({}); |
2365 | | |
2366 | | // "MM" |
2367 | 7.33k | if (!consume_digit_sequence(digits, 2u)) |
2368 | 7.33k | set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); |
2369 | 7.30k | const auto minute = digits[1] + digits[0] * 10u; |
2370 | 7.30k | if (minute > 59u) |
2371 | 7.30k | set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute); |
2372 | 7.30k | auto time = toml::time{ hour, minute }; |
2373 | | |
2374 | | // ':' |
2375 | | if constexpr (TOML_LANG_UNRELEASED) // toml/issues/671 (allow omission of seconds) |
2376 | | { |
2377 | | if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z'))) |
2378 | | return time; |
2379 | | } |
2380 | | else |
2381 | 7.30k | set_error_and_return_if_eof({}); |
2382 | 7.30k | if (*cp != U':') |
2383 | 7.30k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2384 | 7.27k | advance_and_return_if_error_or_eof({}); |
2385 | | |
2386 | | // "SS" |
2387 | 7.27k | if (!consume_digit_sequence(digits, 2u)) |
2388 | 7.27k | set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv); |
2389 | 7.23k | const auto second = digits[1] + digits[0] * 10u; |
2390 | 7.23k | if (second > 59u) |
2391 | 7.23k | set_error_and_return_default("expected second between 0 and 59 (inclusive), saw "sv, second); |
2392 | 7.23k | time.second = static_cast<decltype(time.second)>(second); |
2393 | | |
2394 | | // '.' (early-exiting is allowed; fractional is optional) |
2395 | 7.23k | if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z'))) |
2396 | 3.57k | return time; |
2397 | 3.66k | if (*cp != U'.') |
2398 | 3.66k | set_error_and_return_default("expected '.', saw '"sv, to_sv(*cp), "'"sv); |
2399 | 3.62k | advance_and_return_if_error_or_eof({}); |
2400 | | |
2401 | | // "FFFFFFFFF" |
2402 | 3.62k | size_t digit_count = consume_variable_length_digit_sequence(digits, max_digits); |
2403 | 3.62k | if (!digit_count) |
2404 | 22 | { |
2405 | 22 | set_error_and_return_if_eof({}); |
2406 | 22 | set_error_and_return_default("expected fractional digits, saw '"sv, to_sv(*cp), "'"sv); |
2407 | 22 | } |
2408 | 3.60k | else if (!is_eof()) |
2409 | 3.56k | { |
2410 | 3.56k | if (digit_count == max_digits && is_decimal_digit(*cp)) |
2411 | 3.56k | set_error_and_return_default("fractional component exceeds maximum precision of "sv, max_digits); |
2412 | 3.56k | else if (!part_of_datetime && !is_value_terminator(*cp)) |
2413 | 6 | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2414 | 3.56k | } |
2415 | 3.59k | uint32_t value = 0u; |
2416 | 3.59k | uint32_t place = 1u; |
2417 | 17.9k | for (auto i = impl::min<size_t>(digit_count, 9u); i-- > 0u;) |
2418 | 14.3k | { |
2419 | 14.3k | value += digits[i] * place; |
2420 | 14.3k | place *= 10u; |
2421 | 14.3k | } |
2422 | 21.4k | for (auto i = digit_count; i < 9u; i++) // implicit zeros |
2423 | 17.8k | value *= 10u; |
2424 | 3.59k | time.nanosecond = value; |
2425 | 3.59k | return time; |
2426 | 3.62k | } |
2427 | | |
2428 | | TOML_NODISCARD |
2429 | | TOML_NEVER_INLINE |
2430 | | date_time parse_date_time() |
2431 | 5.04k | { |
2432 | 5.04k | return_if_error({}); |
2433 | 5.04k | assert_not_eof(); |
2434 | 5.04k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2435 | 5.04k | push_parse_scope("date-time"sv); |
2436 | | |
2437 | | // "YYYY-MM-DD" |
2438 | 5.04k | auto date = parse_date(true); |
2439 | 5.04k | set_error_and_return_if_eof({}); |
2440 | | |
2441 | | // ' ', 'T' or 't' |
2442 | 5.04k | if (!is_match(*cp, U' ', U'T', U't')) |
2443 | 5.04k | set_error_and_return_default("expected space, 'T' or 't', saw '"sv, to_sv(*cp), "'"sv); |
2444 | 5.03k | advance_and_return_if_error_or_eof({}); |
2445 | | |
2446 | | // "HH:MM:SS.FFFFFFFFF" |
2447 | 5.03k | auto time = parse_time(true); |
2448 | 5.03k | return_if_error({}); |
2449 | | |
2450 | | // no offset |
2451 | 5.03k | if (is_eof() || is_value_terminator(*cp)) |
2452 | 1.42k | return { date, time }; |
2453 | | |
2454 | | // zero offset ('Z' or 'z') |
2455 | 3.60k | time_offset offset{}; |
2456 | 3.60k | if (is_match(*cp, U'Z', U'z')) |
2457 | 1.44k | advance_and_return_if_error({}); |
2458 | | |
2459 | | // explicit offset ("+/-HH:MM") |
2460 | 2.16k | else if (is_match(*cp, U'+', U'-')) |
2461 | 2.03k | { |
2462 | 2.03k | push_parse_scope("date-time offset"sv); |
2463 | | |
2464 | | // sign |
2465 | 2.03k | int sign = *cp == U'-' ? -1 : 1; |
2466 | 2.03k | advance_and_return_if_error_or_eof({}); |
2467 | | |
2468 | | // "HH" |
2469 | 2.03k | int digits[2]; |
2470 | 2.03k | if (!consume_digit_sequence(digits, 2u)) |
2471 | 2.03k | set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); |
2472 | 2.01k | const auto hour = digits[1] + digits[0] * 10; |
2473 | 2.01k | if (hour > 23) |
2474 | 2.01k | set_error_and_return_default("expected hour between 0 and 23 (inclusive), saw "sv, hour); |
2475 | 2.00k | set_error_and_return_if_eof({}); |
2476 | | |
2477 | | // ':' |
2478 | 2.00k | if (*cp != U':') |
2479 | 2.00k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2480 | 1.99k | advance_and_return_if_error_or_eof({}); |
2481 | | |
2482 | | // "MM" |
2483 | 1.99k | if (!consume_digit_sequence(digits, 2u)) |
2484 | 1.99k | set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); |
2485 | 1.97k | const auto minute = digits[1] + digits[0] * 10; |
2486 | 1.97k | if (minute > 59) |
2487 | 1.97k | set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute); |
2488 | 1.97k | offset.minutes = static_cast<decltype(offset.minutes)>((hour * 60 + minute) * sign); |
2489 | 1.97k | } |
2490 | | |
2491 | 3.54k | if (!is_eof() && !is_value_terminator(*cp)) |
2492 | 3.54k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2493 | | |
2494 | 3.52k | return { date, time, offset }; |
2495 | 3.54k | } |
2496 | | |
2497 | | TOML_NODISCARD |
2498 | | node_ptr parse_array(); |
2499 | | |
2500 | | TOML_NODISCARD |
2501 | | node_ptr parse_inline_table(); |
2502 | | |
2503 | | TOML_NODISCARD |
2504 | | node_ptr parse_value_known_prefixes() |
2505 | 862k | { |
2506 | 862k | return_if_error({}); |
2507 | 862k | assert_not_eof(); |
2508 | 862k | TOML_ASSERT_ASSUME(!is_control_character(*cp)); |
2509 | 862k | TOML_ASSERT_ASSUME(*cp != U'_'); |
2510 | | |
2511 | 862k | switch (cp->value) |
2512 | 862k | { |
2513 | | // arrays |
2514 | 6.24k | case U'[': return parse_array(); |
2515 | | |
2516 | | // inline tables |
2517 | 7.01k | case U'{': return parse_inline_table(); |
2518 | | |
2519 | | // floats beginning with '.' |
2520 | 13 | case U'.': return node_ptr{ new value{ parse_float() } }; |
2521 | | |
2522 | | // strings |
2523 | 2.72k | case U'"': [[fallthrough]]; |
2524 | 7.33k | case U'\'': return node_ptr{ new value{ parse_string().value } }; |
2525 | | |
2526 | 841k | default: |
2527 | 841k | { |
2528 | 841k | const auto cp_upper = static_cast<uint_least32_t>(cp->value) & ~0x20u; |
2529 | | |
2530 | | // bools |
2531 | 841k | if (cp_upper == 70u || cp_upper == 84u) // F or T |
2532 | 1.81k | return node_ptr{ new value{ parse_boolean() } }; |
2533 | | |
2534 | | // inf/nan |
2535 | 839k | else if (cp_upper == 73u || cp_upper == 78u) // I or N |
2536 | 1.27k | return node_ptr{ new value{ parse_inf_or_nan() } }; |
2537 | | |
2538 | 838k | else |
2539 | 838k | return nullptr; |
2540 | 841k | } |
2541 | 862k | } |
2542 | 862k | TOML_UNREACHABLE; |
2543 | 862k | } |
2544 | | |
2545 | | TOML_NODISCARD |
2546 | | node_ptr parse_value() |
2547 | 862k | { |
2548 | 862k | return_if_error({}); |
2549 | 862k | assert_not_eof(); |
2550 | 862k | TOML_ASSERT_ASSUME(!is_value_terminator(*cp)); |
2551 | 862k | push_parse_scope("value"sv); |
2552 | | |
2553 | 862k | const depth_counter_scope depth_counter{ nested_values }; |
2554 | 862k | if TOML_UNLIKELY(nested_values > max_nested_values) |
2555 | 862k | set_error_and_return_default("exceeded maximum nested value depth of "sv, |
2556 | 862k | max_nested_values, |
2557 | 862k | " (TOML_MAX_NESTED_VALUES)"sv); |
2558 | | |
2559 | | // check if it begins with some control character |
2560 | | // (note that this will also fail for whitespace but we're assuming we've |
2561 | | // called consume_leading_whitespace() before calling parse_value()) |
2562 | 862k | if TOML_UNLIKELY(is_control_character(*cp)) |
2563 | 862k | set_error_and_return_default("unexpected control character"sv); |
2564 | | |
2565 | | // underscores at the beginning |
2566 | 862k | else if (*cp == U'_') |
2567 | 1 | set_error_and_return_default("values may not begin with underscores"sv); |
2568 | | |
2569 | 862k | const auto begin_pos = cp->position; |
2570 | 862k | node_ptr val; |
2571 | | |
2572 | 862k | do |
2573 | 862k | { |
2574 | 862k | TOML_ASSERT_ASSUME(!is_control_character(*cp)); |
2575 | 862k | TOML_ASSERT_ASSUME(*cp != U'_'); |
2576 | | |
2577 | | // detect the value type and parse accordingly, |
2578 | | // starting with value types that can be detected |
2579 | | // unambiguously from just one character. |
2580 | | |
2581 | 862k | val = parse_value_known_prefixes(); |
2582 | 862k | return_if_error({}); |
2583 | 862k | if (val) |
2584 | 17.0k | break; |
2585 | | |
2586 | | // value types from here down require more than one character to unambiguously identify |
2587 | | // so scan ahead and collect a set of value 'traits'. |
2588 | 844k | enum TOML_CLOSED_FLAGS_ENUM value_traits : int |
2589 | 844k | { |
2590 | 844k | has_nothing = 0, |
2591 | 844k | has_digits = 1, |
2592 | 844k | has_b = 1 << 1, // as second char only (0b) |
2593 | 844k | has_e = 1 << 2, // only float exponents |
2594 | 844k | has_o = 1 << 3, // as second char only (0o) |
2595 | 844k | has_p = 1 << 4, // only hexfloat exponents |
2596 | 844k | has_t = 1 << 5, |
2597 | 844k | has_x = 1 << 6, // as second or third char only (0x, -0x, +0x) |
2598 | 844k | has_z = 1 << 7, |
2599 | 844k | has_colon = 1 << 8, |
2600 | 844k | has_plus = 1 << 9, |
2601 | 844k | has_minus = 1 << 10, |
2602 | 844k | has_dot = 1 << 11, |
2603 | 844k | begins_sign = 1 << 12, |
2604 | 844k | begins_digit = 1 << 13, |
2605 | 844k | begins_zero = 1 << 14, |
2606 | | |
2607 | 844k | signs_msk = has_plus | has_minus, |
2608 | 844k | bdigit_msk = has_digits | begins_digit, |
2609 | 844k | bzero_msk = bdigit_msk | begins_zero, |
2610 | 844k | }; |
2611 | 844k | value_traits traits = has_nothing; |
2612 | 968k | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; };auto toml::v3::impl::impl_ex::parser::parse_value()::{lambda(auto:1)#1}::operator()<toml::v3::impl::impl_ex::parser::parse_value()::value_traits>(toml::v3::impl::impl_ex::parser::parse_value()::value_traits) constLine | Count | Source | 2612 | 935k | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; }; |
auto toml::v3::impl::impl_ex::parser::parse_value()::{lambda(auto:1)#1}::operator()<int>(int) constLine | Count | Source | 2612 | 33.1k | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; }; |
|
2613 | 844k | const auto has_none = [&](auto t) noexcept { return (traits & t) == has_nothing; }; |
2614 | 2.00M | const auto add_trait = [&](auto t) noexcept { traits = static_cast<value_traits>(traits | t); }; |
2615 | | |
2616 | | // examine the first character to get the 'begins with' traits |
2617 | | // (good fail-fast opportunity; all the remaining types begin with numeric digits or signs) |
2618 | 844k | if (is_decimal_digit(*cp)) |
2619 | 831k | { |
2620 | 831k | add_trait(begins_digit); |
2621 | 831k | if (*cp == U'0') |
2622 | 16.3k | add_trait(begins_zero); |
2623 | 831k | } |
2624 | 13.7k | else if (is_match(*cp, U'+', U'-')) |
2625 | 6.95k | add_trait(begins_sign); |
2626 | 6.76k | else |
2627 | 6.76k | break; |
2628 | | |
2629 | | // scan the rest of the value to determine the remaining traits |
2630 | 838k | char32_t chars[utf8_buffered_reader::max_history_length]; |
2631 | 838k | size_t char_count = {}, advance_count = {}; |
2632 | 838k | bool eof_while_scanning = false; |
2633 | 838k | const auto scan = [&]() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
2634 | 840k | { |
2635 | 840k | if (is_eof()) |
2636 | 1 | return; |
2637 | 840k | TOML_ASSERT_ASSUME(!is_value_terminator(*cp)); |
2638 | | |
2639 | 840k | do |
2640 | 1.16M | { |
2641 | 1.16M | if (const auto c = **cp; c != U'_') |
2642 | 1.15M | { |
2643 | 1.15M | chars[char_count++] = c; |
2644 | | |
2645 | 1.15M | if (is_decimal_digit(c)) |
2646 | 1.08M | add_trait(has_digits); |
2647 | 65.4k | else if (is_ascii_letter(c)) |
2648 | 13.5k | { |
2649 | 13.5k | TOML_ASSERT_ASSUME((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z')); |
2650 | 13.5k | switch (static_cast<char32_t>(c | 32u)) |
2651 | 13.5k | { |
2652 | 1.51k | case U'b': |
2653 | 1.51k | if (char_count == 2u && has_any(begins_zero)) |
2654 | 1.02k | add_trait(has_b); |
2655 | 1.51k | break; |
2656 | | |
2657 | 1.77k | case U'e': |
2658 | 1.77k | if (char_count > 1u |
2659 | 1.77k | && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) |
2660 | 1.58k | && (has_none(has_plus | has_minus) || has_any(begins_sign))) |
2661 | 1.41k | add_trait(has_e); |
2662 | 1.77k | break; |
2663 | | |
2664 | 1.13k | case U'o': |
2665 | 1.13k | if (char_count == 2u && has_any(begins_zero)) |
2666 | 1.01k | add_trait(has_o); |
2667 | 1.13k | break; |
2668 | | |
2669 | 151 | case U'p': |
2670 | 151 | if (has_any(has_x)) |
2671 | 75 | add_trait(has_p); |
2672 | 151 | break; |
2673 | | |
2674 | 1.69k | case U'x': |
2675 | 1.69k | if ((char_count == 2u && has_any(begins_zero)) |
2676 | 268 | || (char_count == 3u && has_any(begins_sign) && chars[1] == U'0')) |
2677 | 1.42k | add_trait(has_x); |
2678 | 1.69k | break; |
2679 | | |
2680 | 2.67k | case U't': add_trait(has_t); break; |
2681 | 1.67k | case U'z': add_trait(has_z); break; |
2682 | 13.5k | } |
2683 | 13.5k | } |
2684 | 51.9k | else if (c <= U':') |
2685 | 50.8k | { |
2686 | 50.8k | TOML_ASSERT_ASSUME(c < U'0' || c > U'9'); |
2687 | 50.8k | switch (c) |
2688 | 50.8k | { |
2689 | 3.59k | case U'+': add_trait(has_plus); break; |
2690 | 22.8k | case U'-': add_trait(has_minus); break; |
2691 | 6.99k | case U'.': add_trait(has_dot); break; |
2692 | 17.0k | case U':': add_trait(has_colon); break; |
2693 | 50.8k | } |
2694 | 50.8k | } |
2695 | 1.15M | } |
2696 | | |
2697 | 1.16M | advance_and_return_if_error(); |
2698 | 1.16M | advance_count++; |
2699 | 1.16M | eof_while_scanning = is_eof(); |
2700 | 1.16M | } |
2701 | 1.16M | while (advance_count < (utf8_buffered_reader::max_history_length - 1u) && !is_eof() |
2702 | 1.16M | && !is_value_terminator(*cp)); |
2703 | 840k | }; |
2704 | 838k | scan(); |
2705 | 838k | return_if_error({}); |
2706 | | |
2707 | | // force further scanning if this could have been a date-time with a space instead of a T |
2708 | 838k | if (char_count == 10u // |
2709 | 6.47k | && (traits | begins_zero) == (bzero_msk | has_minus) // |
2710 | 5.41k | && chars[4] == U'-' // |
2711 | 5.39k | && chars[7] == U'-' // |
2712 | 5.39k | && !is_eof() // |
2713 | 5.38k | && *cp == U' ') |
2714 | 3.55k | { |
2715 | 3.55k | const auto pre_advance_count = advance_count; |
2716 | 3.55k | const auto pre_scan_traits = traits; |
2717 | 3.55k | chars[char_count++] = *cp; |
2718 | 3.55k | add_trait(has_t); |
2719 | | |
2720 | 3.55k | const auto backpedal = [&]() noexcept |
2721 | 3.55k | { |
2722 | 1.05k | go_back(advance_count - pre_advance_count); |
2723 | 1.05k | advance_count = pre_advance_count; |
2724 | 1.05k | traits = pre_scan_traits; |
2725 | 1.05k | char_count = 10u; |
2726 | 1.05k | }; |
2727 | | |
2728 | 3.55k | advance_and_return_if_error({}); |
2729 | 3.55k | advance_count++; |
2730 | | |
2731 | 3.55k | if (is_eof() || !is_decimal_digit(*cp)) |
2732 | 1.05k | backpedal(); |
2733 | 2.50k | else |
2734 | 2.50k | { |
2735 | 2.50k | chars[char_count++] = *cp; |
2736 | | |
2737 | 2.50k | advance_and_return_if_error({}); |
2738 | 2.50k | advance_count++; |
2739 | | |
2740 | 2.50k | scan(); |
2741 | 2.50k | return_if_error({}); |
2742 | | |
2743 | 2.50k | if (char_count == 12u) |
2744 | 1 | backpedal(); |
2745 | 2.50k | } |
2746 | 3.55k | } |
2747 | | |
2748 | | // set the reader back to where we started |
2749 | 838k | go_back(advance_count); |
2750 | | |
2751 | | // if after scanning ahead we still only have one value character, |
2752 | | // the only valid value type is an integer. |
2753 | 838k | if (char_count == 1u) |
2754 | 805k | { |
2755 | 805k | if (has_any(begins_digit)) |
2756 | 805k | { |
2757 | 805k | val.reset(new value{ static_cast<int64_t>(chars[0] - U'0') }); |
2758 | 805k | advance(); // skip the digit |
2759 | 805k | break; |
2760 | 805k | } |
2761 | | |
2762 | | // anything else would be ambiguous. |
2763 | 6 | else |
2764 | 805k | set_error_and_return_default(eof_while_scanning ? "encountered end-of-file"sv |
2765 | 805k | : "could not determine value type"sv); |
2766 | 805k | } |
2767 | | |
2768 | | // now things that can be identified from two or more characters |
2769 | 33.1k | return_if_error({}); |
2770 | 33.1k | TOML_ASSERT_ASSUME(char_count >= 2u); |
2771 | | |
2772 | | // do some 'fuzzy matching' where there's no ambiguity, since that allows the specific |
2773 | | // typed parse functions to take over and show better diagnostics if there's an issue |
2774 | | // (as opposed to the fallback "could not determine type" message) |
2775 | 33.1k | if (has_any(has_p)) |
2776 | 9 | val.reset(new value{ parse_hex_float() }); |
2777 | 33.1k | else if (has_any(has_x | has_o | has_b)) |
2778 | 3.45k | { |
2779 | 3.45k | int64_t i; |
2780 | 3.45k | value_flags flags; |
2781 | 3.45k | if (has_any(has_x)) |
2782 | 1.42k | { |
2783 | 1.42k | i = parse_integer<16>(); |
2784 | 1.42k | flags = value_flags::format_as_hexadecimal; |
2785 | 1.42k | } |
2786 | 2.03k | else if (has_any(has_o)) |
2787 | 1.01k | { |
2788 | 1.01k | i = parse_integer<8>(); |
2789 | 1.01k | flags = value_flags::format_as_octal; |
2790 | 1.01k | } |
2791 | 1.02k | else // has_b |
2792 | 1.02k | { |
2793 | 1.02k | i = parse_integer<2>(); |
2794 | 1.02k | flags = value_flags::format_as_binary; |
2795 | 1.02k | } |
2796 | 3.45k | return_if_error({}); |
2797 | | |
2798 | 3.45k | val.reset(new value{ i }); |
2799 | 3.45k | val->ref_cast<int64_t>().flags(flags); |
2800 | 3.45k | } |
2801 | 29.6k | else if (has_any(has_e) || (has_any(begins_digit) && chars[1] == U'.')) |
2802 | 2.78k | val.reset(new value{ parse_float() }); |
2803 | 26.8k | else if (has_any(begins_sign)) |
2804 | 6.63k | { |
2805 | | // single-digit signed integers |
2806 | 6.63k | if (char_count == 2u && has_any(has_digits)) |
2807 | 2.29k | { |
2808 | 2.29k | val.reset(new value{ static_cast<int64_t>(chars[1] - U'0') * (chars[0] == U'-' ? -1LL : 1LL) }); |
2809 | 2.29k | advance(); // skip the sign |
2810 | 2.29k | advance(); // skip the digit |
2811 | 2.29k | break; |
2812 | 2.29k | } |
2813 | | |
2814 | | // simple signed floats (e.g. +1.0) |
2815 | 4.33k | if (is_decimal_digit(chars[1]) && chars[2] == U'.') |
2816 | 293 | val.reset(new value{ parse_float() }); |
2817 | | |
2818 | | // signed infinity or nan |
2819 | 4.04k | else if (is_match(chars[1], U'i', U'n', U'I', U'N')) |
2820 | 382 | val.reset(new value{ parse_inf_or_nan() }); |
2821 | 4.33k | } |
2822 | | |
2823 | 30.8k | return_if_error({}); |
2824 | 30.8k | if (val) |
2825 | 6.45k | break; |
2826 | | |
2827 | | // match trait masks against what they can match exclusively. |
2828 | | // all correct value parses will come out of this list, so doing this as a switch is likely to |
2829 | | // be a better friend to the optimizer on the success path (failure path can be slow but that |
2830 | | // doesn't matter much). |
2831 | 24.3k | switch (unwrap_enum(traits)) |
2832 | 24.3k | { |
2833 | | // binary integers |
2834 | | // 0b10 |
2835 | 0 | case bzero_msk | has_b: |
2836 | 0 | val.reset(new value{ parse_integer<2>() }); |
2837 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_binary); |
2838 | 0 | break; |
2839 | | |
2840 | | // octal integers |
2841 | | // 0o10 |
2842 | 0 | case bzero_msk | has_o: |
2843 | 0 | val.reset(new value{ parse_integer<8>() }); |
2844 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_octal); |
2845 | 0 | break; |
2846 | | |
2847 | | // decimal integers |
2848 | | // 00 |
2849 | | // 10 |
2850 | | // +10 |
2851 | | // -10 |
2852 | 30 | case bzero_msk: [[fallthrough]]; |
2853 | 8.93k | case bdigit_msk: [[fallthrough]]; |
2854 | 11.3k | case begins_sign | has_digits | has_minus: [[fallthrough]]; |
2855 | 11.7k | case begins_sign | has_digits | has_plus: |
2856 | 11.7k | { |
2857 | | // if the value was so long we exhausted the history buffer it's reasonable to assume |
2858 | | // there was more and the value's actual type is impossible to identify without making the |
2859 | | // buffer bigger (since it could have actually been a float), so emit an error. |
2860 | | // |
2861 | | // (this will likely only come up during fuzzing and similar scenarios) |
2862 | 11.7k | static constexpr size_t max_numeric_value_length = |
2863 | 11.7k | utf8_buffered_reader::max_history_length - 2u; |
2864 | 11.7k | if TOML_UNLIKELY(!eof_while_scanning && advance_count > max_numeric_value_length) |
2865 | 11.7k | set_error_and_return_default("numeric value too long to identify type - cannot exceed "sv, |
2866 | 11.7k | max_numeric_value_length, |
2867 | 11.7k | " characters"sv); |
2868 | | |
2869 | 11.7k | val.reset(new value{ parse_integer<10>() }); |
2870 | 11.7k | break; |
2871 | 11.7k | } |
2872 | | |
2873 | | // hexadecimal integers |
2874 | | // 0x10 |
2875 | 0 | case bzero_msk | has_x: |
2876 | 0 | val.reset(new value{ parse_integer<16>() }); |
2877 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_hexadecimal); |
2878 | 0 | break; |
2879 | | |
2880 | | // decimal floats |
2881 | | // 0e1 |
2882 | | // 0e-1 |
2883 | | // 0e+1 |
2884 | | // 0.0 |
2885 | | // 0.0e1 |
2886 | | // 0.0e-1 |
2887 | | // 0.0e+1 |
2888 | 0 | case bzero_msk | has_e: [[fallthrough]]; |
2889 | 0 | case bzero_msk | has_e | has_minus: [[fallthrough]]; |
2890 | 0 | case bzero_msk | has_e | has_plus: [[fallthrough]]; |
2891 | 6 | case bzero_msk | has_dot: [[fallthrough]]; |
2892 | 6 | case bzero_msk | has_dot | has_e: [[fallthrough]]; |
2893 | 6 | case bzero_msk | has_dot | has_e | has_minus: [[fallthrough]]; |
2894 | 6 | case bzero_msk | has_dot | has_e | has_plus: [[fallthrough]]; |
2895 | | // 1e1 |
2896 | | // 1e-1 |
2897 | | // 1e+1 |
2898 | | // 1.0 |
2899 | | // 1.0e1 |
2900 | | // 1.0e-1 |
2901 | | // 1.0e+1 |
2902 | 6 | case bdigit_msk | has_e: [[fallthrough]]; |
2903 | 6 | case bdigit_msk | has_e | has_minus: [[fallthrough]]; |
2904 | 6 | case bdigit_msk | has_e | has_plus: [[fallthrough]]; |
2905 | 692 | case bdigit_msk | has_dot: [[fallthrough]]; |
2906 | 692 | case bdigit_msk | has_dot | has_e: [[fallthrough]]; |
2907 | 692 | case bdigit_msk | has_dot | has_e | has_minus: [[fallthrough]]; |
2908 | 692 | case bdigit_msk | has_dot | has_e | has_plus: [[fallthrough]]; |
2909 | | // +1e1 |
2910 | | // +1.0 |
2911 | | // +1.0e1 |
2912 | | // +1.0e+1 |
2913 | | // +1.0e-1 |
2914 | | // -1.0e+1 |
2915 | 692 | case begins_sign | has_digits | has_e | has_plus: [[fallthrough]]; |
2916 | 951 | case begins_sign | has_digits | has_dot | has_plus: [[fallthrough]]; |
2917 | 951 | case begins_sign | has_digits | has_dot | has_e | has_plus: [[fallthrough]]; |
2918 | 951 | case begins_sign | has_digits | has_dot | has_e | signs_msk: [[fallthrough]]; |
2919 | | // -1e1 |
2920 | | // -1e+1 |
2921 | | // +1e-1 |
2922 | | // -1.0 |
2923 | | // -1.0e1 |
2924 | | // -1.0e-1 |
2925 | 951 | case begins_sign | has_digits | has_e | has_minus: [[fallthrough]]; |
2926 | 951 | case begins_sign | has_digits | has_e | signs_msk: [[fallthrough]]; |
2927 | 1.44k | case begins_sign | has_digits | has_dot | has_minus: [[fallthrough]]; |
2928 | 1.44k | case begins_sign | has_digits | has_dot | has_e | has_minus: |
2929 | 1.44k | val.reset(new value{ parse_float() }); |
2930 | 1.44k | break; |
2931 | | |
2932 | | // hexadecimal floats |
2933 | | // 0x10p0 |
2934 | | // 0x10p-0 |
2935 | | // 0x10p+0 |
2936 | 0 | case bzero_msk | has_x | has_p: [[fallthrough]]; |
2937 | 0 | case bzero_msk | has_x | has_p | has_minus: [[fallthrough]]; |
2938 | 0 | case bzero_msk | has_x | has_p | has_plus: [[fallthrough]]; |
2939 | | // -0x10p0 |
2940 | | // -0x10p-0 |
2941 | | // +0x10p0 |
2942 | | // +0x10p+0 |
2943 | | // -0x10p+0 |
2944 | | // +0x10p-0 |
2945 | 0 | case begins_sign | has_digits | has_x | has_p | has_minus: [[fallthrough]]; |
2946 | 0 | case begins_sign | has_digits | has_x | has_p | has_plus: [[fallthrough]]; |
2947 | 0 | case begins_sign | has_digits | has_x | has_p | signs_msk: [[fallthrough]]; |
2948 | | // 0x10.1p0 |
2949 | | // 0x10.1p-0 |
2950 | | // 0x10.1p+0 |
2951 | 0 | case bzero_msk | has_x | has_dot | has_p: [[fallthrough]]; |
2952 | 0 | case bzero_msk | has_x | has_dot | has_p | has_minus: [[fallthrough]]; |
2953 | 0 | case bzero_msk | has_x | has_dot | has_p | has_plus: [[fallthrough]]; |
2954 | | // -0x10.1p0 |
2955 | | // -0x10.1p-0 |
2956 | | // +0x10.1p0 |
2957 | | // +0x10.1p+0 |
2958 | | // -0x10.1p+0 |
2959 | | // +0x10.1p-0 |
2960 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | has_minus: [[fallthrough]]; |
2961 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | has_plus: [[fallthrough]]; |
2962 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | signs_msk: |
2963 | 0 | val.reset(new value{ parse_hex_float() }); |
2964 | 0 | break; |
2965 | | |
2966 | | // times |
2967 | | // HH:MM |
2968 | | // HH:MM:SS |
2969 | | // HH:MM:SS.FFFFFF |
2970 | 359 | case bzero_msk | has_colon: [[fallthrough]]; |
2971 | 1.00k | case bzero_msk | has_colon | has_dot: [[fallthrough]]; |
2972 | 1.55k | case bdigit_msk | has_colon: [[fallthrough]]; |
2973 | 2.43k | case bdigit_msk | has_colon | has_dot: val.reset(new value{ parse_time() }); break; |
2974 | | |
2975 | | // local dates |
2976 | | // YYYY-MM-DD |
2977 | 866 | case bzero_msk | has_minus: [[fallthrough]]; |
2978 | 3.04k | case bdigit_msk | has_minus: val.reset(new value{ parse_date() }); break; |
2979 | | |
2980 | | // date-times |
2981 | | // YYYY-MM-DDTHH:MM |
2982 | | // YYYY-MM-DDTHH:MM-HH:MM |
2983 | | // YYYY-MM-DDTHH:MM+HH:MM |
2984 | | // YYYY-MM-DD HH:MM |
2985 | | // YYYY-MM-DD HH:MM-HH:MM |
2986 | | // YYYY-MM-DD HH:MM+HH:MM |
2987 | | // YYYY-MM-DDTHH:MM:SS |
2988 | | // YYYY-MM-DDTHH:MM:SS-HH:MM |
2989 | | // YYYY-MM-DDTHH:MM:SS+HH:MM |
2990 | | // YYYY-MM-DD HH:MM:SS |
2991 | | // YYYY-MM-DD HH:MM:SS-HH:MM |
2992 | | // YYYY-MM-DD HH:MM:SS+HH:MM |
2993 | 396 | case bzero_msk | has_minus | has_colon | has_t: [[fallthrough]]; |
2994 | 756 | case bzero_msk | signs_msk | has_colon | has_t: [[fallthrough]]; |
2995 | 1.76k | case bdigit_msk | has_minus | has_colon | has_t: [[fallthrough]]; |
2996 | 2.34k | case bdigit_msk | signs_msk | has_colon | has_t: [[fallthrough]]; |
2997 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF |
2998 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF-HH:MM |
2999 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF+HH:MM |
3000 | | // YYYY-MM-DD HH:MM:SS.FFFFFF |
3001 | | // YYYY-MM-DD HH:MM:SS.FFFFFF-HH:MM |
3002 | | // YYYY-MM-DD HH:MM:SS.FFFFFF+HH:MM |
3003 | 2.53k | case bzero_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]]; |
3004 | 2.80k | case bzero_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]]; |
3005 | 3.23k | case bdigit_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]]; |
3006 | 3.56k | case bdigit_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]]; |
3007 | | // YYYY-MM-DDTHH:MMZ |
3008 | | // YYYY-MM-DD HH:MMZ |
3009 | | // YYYY-MM-DDTHH:MM:SSZ |
3010 | | // YYYY-MM-DD HH:MM:SSZ |
3011 | | // YYYY-MM-DDTHH:MM:SS.FFFFFFZ |
3012 | | // YYYY-MM-DD HH:MM:SS.FFFFFFZ |
3013 | 3.79k | case bzero_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]]; |
3014 | 4.25k | case bzero_msk | has_minus | has_colon | has_dot | has_z | has_t: [[fallthrough]]; |
3015 | 4.58k | case bdigit_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]]; |
3016 | 5.04k | case bdigit_msk | has_minus | has_colon | has_dot | has_z | has_t: |
3017 | 5.04k | val.reset(new value{ parse_date_time() }); |
3018 | 5.04k | break; |
3019 | 24.3k | } |
3020 | 24.3k | } |
3021 | 862k | while (false); |
3022 | | |
3023 | 860k | if (!val) |
3024 | 270 | { |
3025 | 270 | set_error_at(begin_pos, "could not determine value type"sv); |
3026 | 270 | return_after_error({}); |
3027 | 270 | } |
3028 | | |
3029 | 860k | val->source_ = { begin_pos, current_position(1), reader.source_path() }; |
3030 | 860k | return val; |
3031 | 860k | } |
3032 | | |
3033 | | TOML_NEVER_INLINE |
3034 | | bool parse_key() |
3035 | 208k | { |
3036 | 208k | return_if_error({}); |
3037 | 208k | assert_not_eof(); |
3038 | 208k | TOML_ASSERT_ASSUME(is_bare_key_character(*cp) || is_string_delimiter(*cp)); |
3039 | 208k | push_parse_scope("key"sv); |
3040 | | |
3041 | 208k | key_buffer.clear(); |
3042 | 208k | recording_whitespace = false; |
3043 | | |
3044 | 1.15M | while (!is_error()) |
3045 | 1.15M | { |
3046 | 1.15M | std::string_view key_segment; |
3047 | 1.15M | const auto key_begin = current_position(); |
3048 | | |
3049 | | // bare_key_segment |
3050 | 1.15M | if (is_bare_key_character(*cp)) |
3051 | 1.15M | key_segment = parse_bare_key_segment(); |
3052 | | |
3053 | | // "quoted key segment" |
3054 | 4.17k | else if (is_string_delimiter(*cp)) |
3055 | 4.12k | { |
3056 | 4.12k | const auto begin_pos = cp->position; |
3057 | | |
3058 | 4.12k | recording_whitespace = true; |
3059 | 4.12k | parsed_string str = parse_string(); |
3060 | 4.12k | recording_whitespace = false; |
3061 | 4.12k | return_if_error({}); |
3062 | | |
3063 | 4.12k | if (str.was_multi_line) |
3064 | 14 | { |
3065 | 14 | set_error_at(begin_pos, |
3066 | 14 | "multi-line strings are prohibited in "sv, |
3067 | 14 | key_buffer.empty() ? ""sv : "dotted "sv, |
3068 | 14 | "keys"sv); |
3069 | 14 | return_after_error({}); |
3070 | 14 | } |
3071 | 4.10k | else |
3072 | 4.10k | key_segment = str.value; |
3073 | 4.12k | } |
3074 | | |
3075 | | // ??? |
3076 | 53 | else |
3077 | 53 | set_error_and_return_default("expected bare key starting character or string delimiter, saw '"sv, |
3078 | 1.15M | to_sv(*cp), |
3079 | 1.15M | "'"sv); |
3080 | | |
3081 | 1.15M | const auto key_end = current_position(); |
3082 | | |
3083 | | // whitespace following the key segment |
3084 | 1.15M | consume_leading_whitespace(); |
3085 | | |
3086 | | // store segment |
3087 | 1.15M | key_buffer.push_back(key_segment, key_begin, key_end); |
3088 | | |
3089 | 1.15M | if TOML_UNLIKELY(key_buffer.size() > max_dotted_keys_depth) |
3090 | 1.15M | set_error_and_return_default("exceeded maximum dotted keys depth of "sv, |
3091 | 1.15M | max_dotted_keys_depth, |
3092 | 1.15M | " (TOML_MAX_DOTTED_KEYS_DEPTH)"sv); |
3093 | | |
3094 | | // eof or no more key to come |
3095 | 1.15M | if (is_eof() || *cp != U'.') |
3096 | 207k | break; |
3097 | | |
3098 | | // was a dotted key - go around again |
3099 | 950k | advance_and_return_if_error_or_eof({}); |
3100 | 950k | consume_leading_whitespace(); |
3101 | 950k | set_error_and_return_if_eof({}); |
3102 | 950k | } |
3103 | 208k | return_if_error({}); |
3104 | | |
3105 | 208k | return true; |
3106 | 208k | } |
3107 | | |
3108 | | TOML_NODISCARD |
3109 | | key make_key(size_t segment_index) const |
3110 | 856k | { |
3111 | 856k | TOML_ASSERT(key_buffer.size() > segment_index); |
3112 | | |
3113 | 856k | return key{ |
3114 | 856k | key_buffer[segment_index], |
3115 | 856k | source_region{ key_buffer.starts[segment_index], key_buffer.ends[segment_index], root.source().path } |
3116 | 856k | }; |
3117 | 856k | } |
3118 | | |
3119 | | TOML_NODISCARD |
3120 | | TOML_NEVER_INLINE |
3121 | | table* parse_table_header() |
3122 | 172k | { |
3123 | 172k | return_if_error({}); |
3124 | 172k | assert_not_eof(); |
3125 | 172k | TOML_ASSERT_ASSUME(*cp == U'['); |
3126 | 172k | push_parse_scope("table header"sv); |
3127 | | |
3128 | 172k | const source_position header_begin_pos = cp->position; |
3129 | 172k | source_position header_end_pos; |
3130 | 172k | bool is_arr = false; |
3131 | | |
3132 | | // parse header |
3133 | 172k | { |
3134 | | // skip first '[' |
3135 | 172k | advance_and_return_if_error_or_eof({}); |
3136 | | |
3137 | | // skip past any whitespace that followed the '[' |
3138 | 172k | const bool had_leading_whitespace = consume_leading_whitespace(); |
3139 | 172k | set_error_and_return_if_eof({}); |
3140 | | |
3141 | | // skip second '[' (if present) |
3142 | 172k | if (*cp == U'[') |
3143 | 153k | { |
3144 | 153k | if (had_leading_whitespace) |
3145 | 153k | set_error_and_return_default( |
3146 | 153k | "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv); |
3147 | | |
3148 | 153k | is_arr = true; |
3149 | 153k | advance_and_return_if_error_or_eof({}); |
3150 | | |
3151 | | // skip past any whitespace that followed the '[' |
3152 | 153k | consume_leading_whitespace(); |
3153 | 153k | set_error_and_return_if_eof({}); |
3154 | 153k | } |
3155 | | |
3156 | | // check for a premature closing ']' |
3157 | 172k | if (*cp == U']') |
3158 | 172k | set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv); |
3159 | | |
3160 | | // get the actual key |
3161 | 172k | start_recording(); |
3162 | 172k | parse_key(); |
3163 | 172k | stop_recording(1u); |
3164 | 172k | return_if_error({}); |
3165 | | |
3166 | | // skip past any whitespace that followed the key |
3167 | 172k | consume_leading_whitespace(); |
3168 | 172k | return_if_error({}); |
3169 | 172k | set_error_and_return_if_eof({}); |
3170 | | |
3171 | | // consume the closing ']' |
3172 | 172k | if (*cp != U']') |
3173 | 172k | set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv); |
3174 | 172k | if (is_arr) |
3175 | 153k | { |
3176 | 153k | advance_and_return_if_error_or_eof({}); |
3177 | 153k | if (*cp != U']') |
3178 | 153k | set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv); |
3179 | 153k | } |
3180 | 172k | advance_and_return_if_error({}); |
3181 | 172k | header_end_pos = current_position(1); |
3182 | | |
3183 | | // handle the rest of the line after the header |
3184 | 172k | consume_leading_whitespace(); |
3185 | 172k | if (!is_eof() && !consume_comment() && !consume_line_break()) |
3186 | 172k | set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); |
3187 | 172k | } |
3188 | 172k | TOML_ASSERT(!key_buffer.empty()); |
3189 | | |
3190 | | // check if each parent is a table/table array, or can be created implicitly as a table. |
3191 | 172k | table* parent = &root; |
3192 | 488k | for (size_t i = 0, e = key_buffer.size() - 1u; i < e; i++) |
3193 | 316k | { |
3194 | 316k | const std::string_view segment = key_buffer[i]; |
3195 | 316k | auto pit = parent->lower_bound(segment); |
3196 | | |
3197 | | // parent already existed |
3198 | 316k | if (pit != parent->end() && pit->first == segment) |
3199 | 262k | { |
3200 | 262k | node& p = pit->second; |
3201 | | |
3202 | 262k | if (auto tbl = p.as_table()) |
3203 | 113k | { |
3204 | | // adding to closed inline tables is illegal |
3205 | 113k | if (tbl->is_inline() && !impl::find(open_inline_tables.begin(), open_inline_tables.end(), tbl)) |
3206 | 113k | set_error_and_return_default("cannot insert '"sv, |
3207 | 113k | to_sv(recording_buffer), |
3208 | 113k | "' into existing inline table"sv); |
3209 | | |
3210 | 113k | parent = tbl; |
3211 | 113k | } |
3212 | 149k | else if (auto arr = p.as_array(); arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) |
3213 | 149k | { |
3214 | | // table arrays are a special case; |
3215 | | // the spec dictates we select the most recently declared element in the array. |
3216 | 149k | TOML_ASSERT(!arr->empty()); |
3217 | 149k | TOML_ASSERT(arr->back().is_table()); |
3218 | 149k | parent = &arr->back().ref_cast<table>(); |
3219 | 149k | } |
3220 | 16 | else |
3221 | 16 | { |
3222 | 16 | if (!is_arr && p.type() == node_type::table) |
3223 | 16 | set_error_and_return_default("cannot redefine existing table '"sv, |
3224 | 16 | to_sv(recording_buffer), |
3225 | 16 | "'"sv); |
3226 | 16 | else |
3227 | 16 | set_error_and_return_default("cannot redefine existing "sv, |
3228 | 16 | to_sv(p.type()), |
3229 | 16 | " '"sv, |
3230 | 16 | to_sv(recording_buffer), |
3231 | 16 | "' as "sv, |
3232 | 16 | is_arr ? "array-of-tables"sv : "table"sv); |
3233 | 16 | } |
3234 | 262k | } |
3235 | | |
3236 | | // need to create a new implicit table |
3237 | 53.9k | else |
3238 | 53.9k | { |
3239 | 53.9k | pit = parent->emplace_hint<table>(pit, make_key(i)); |
3240 | 53.9k | table& p = pit->second.ref_cast<table>(); |
3241 | 53.9k | p.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3242 | | |
3243 | 53.9k | implicit_tables.push_back(&p); |
3244 | 53.9k | parent = &p; |
3245 | 53.9k | } |
3246 | 316k | } |
3247 | | |
3248 | 172k | const auto last_segment = key_buffer.back(); |
3249 | 172k | auto it = parent->lower_bound(last_segment); |
3250 | | |
3251 | | // if there was already a matching node some sanity checking is necessary; |
3252 | | // this is ok if we're making an array and the existing element is already an array (new element) |
3253 | | // or if we're making a table and the existing element is an implicitly-created table (promote it), |
3254 | | // otherwise this is a redefinition error. |
3255 | 172k | if (it != parent->end() && it->first == last_segment) |
3256 | 30.6k | { |
3257 | 30.6k | node& matching_node = it->second; |
3258 | 30.6k | if (auto arr = matching_node.as_array(); |
3259 | 30.6k | is_arr && arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) |
3260 | 18.1k | { |
3261 | 18.1k | table& tbl = arr->emplace_back<table>(); |
3262 | 18.1k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3263 | 18.1k | return &tbl; |
3264 | 18.1k | } |
3265 | | |
3266 | 12.4k | else if (auto tbl = matching_node.as_table(); !is_arr && tbl && !implicit_tables.empty()) |
3267 | 12.4k | { |
3268 | 12.4k | if (auto found = impl::find(implicit_tables.begin(), implicit_tables.end(), tbl); found) |
3269 | 12.4k | { |
3270 | 12.4k | bool ok = true; |
3271 | 12.4k | if (!tbl->empty()) |
3272 | 12.4k | { |
3273 | 12.4k | for (auto& [_, child] : *tbl) |
3274 | 121k | { |
3275 | 121k | if (!child.is_table() && !child.is_array_of_tables()) |
3276 | 11 | { |
3277 | 11 | ok = false; |
3278 | 11 | break; |
3279 | 11 | } |
3280 | 121k | } |
3281 | 12.4k | } |
3282 | | |
3283 | 12.4k | if (ok) |
3284 | 12.4k | { |
3285 | 12.4k | implicit_tables.erase(implicit_tables.cbegin() + (found - implicit_tables.data())); |
3286 | 12.4k | tbl->source_.begin = header_begin_pos; |
3287 | 12.4k | tbl->source_.end = header_end_pos; |
3288 | 12.4k | return tbl; |
3289 | 12.4k | } |
3290 | 12.4k | } |
3291 | 12.4k | } |
3292 | | |
3293 | | // if we get here it's a redefinition error. |
3294 | 34 | if (!is_arr && matching_node.type() == node_type::table) |
3295 | 20 | { |
3296 | 20 | set_error_at(header_begin_pos, |
3297 | 20 | "cannot redefine existing table '"sv, |
3298 | 20 | to_sv(recording_buffer), |
3299 | 20 | "'"sv); |
3300 | 20 | return_after_error({}); |
3301 | 20 | } |
3302 | 14 | else |
3303 | 14 | { |
3304 | 14 | set_error_at(header_begin_pos, |
3305 | 14 | "cannot redefine existing "sv, |
3306 | 14 | to_sv(matching_node.type()), |
3307 | 14 | " '"sv, |
3308 | 14 | to_sv(recording_buffer), |
3309 | 14 | "' as "sv, |
3310 | 14 | is_arr ? "array-of-tables"sv : "table"sv); |
3311 | 14 | return_after_error({}); |
3312 | 14 | } |
3313 | 34 | } |
3314 | | |
3315 | | // there was no matching node, sweet - we can freely instantiate a new table/table array. |
3316 | 141k | else |
3317 | 141k | { |
3318 | 141k | auto last_key = make_key(key_buffer.size() - 1u); |
3319 | | |
3320 | | // if it's an array we need to make the array and it's first table element, |
3321 | | // set the starting regions, and return the table element |
3322 | 141k | if (is_arr) |
3323 | 135k | { |
3324 | 135k | it = parent->emplace_hint<array>(it, std::move(last_key)); |
3325 | 135k | array& tbl_arr = it->second.ref_cast<array>(); |
3326 | 135k | table_arrays.push_back(&tbl_arr); |
3327 | 135k | tbl_arr.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3328 | | |
3329 | 135k | table& tbl = tbl_arr.emplace_back<table>(); |
3330 | 135k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3331 | 135k | return &tbl; |
3332 | 135k | } |
3333 | | |
3334 | | // otherwise we're just making a table |
3335 | 5.81k | else |
3336 | 5.81k | { |
3337 | 5.81k | it = parent->emplace_hint<table>(it, std::move(last_key)); |
3338 | 5.81k | table& tbl = it->second.ref_cast<table>(); |
3339 | 5.81k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3340 | 5.81k | return &tbl; |
3341 | 5.81k | } |
3342 | 141k | } |
3343 | 172k | } |
3344 | | |
3345 | | TOML_NEVER_INLINE |
3346 | | bool parse_key_value_pair_and_insert(table* tbl) |
3347 | 35.9k | { |
3348 | 35.9k | return_if_error({}); |
3349 | 35.9k | assert_not_eof(); |
3350 | 35.9k | TOML_ASSERT_ASSUME(is_string_delimiter(*cp) || is_bare_key_character(*cp)); |
3351 | 35.9k | push_parse_scope("key-value pair"sv); |
3352 | | |
3353 | | // read the key into the key buffer |
3354 | 35.9k | start_recording(); |
3355 | 35.9k | parse_key(); |
3356 | 35.9k | stop_recording(1u); |
3357 | 35.9k | return_if_error({}); |
3358 | 35.9k | TOML_ASSERT(key_buffer.size() >= 1u); |
3359 | | |
3360 | | // skip past any whitespace that followed the key |
3361 | 35.9k | consume_leading_whitespace(); |
3362 | 35.9k | set_error_and_return_if_eof({}); |
3363 | | |
3364 | | // '=' |
3365 | 35.8k | if (*cp != U'=') |
3366 | 35.8k | set_error_and_return_default("expected '=', saw '"sv, to_sv(*cp), "'"sv); |
3367 | 35.6k | advance_and_return_if_error_or_eof({}); |
3368 | | |
3369 | | // skip past any whitespace that followed the '=' |
3370 | 35.6k | consume_leading_whitespace(); |
3371 | 35.6k | return_if_error({}); |
3372 | 35.6k | set_error_and_return_if_eof({}); |
3373 | | |
3374 | | // check that the next character could actually be a value |
3375 | 35.6k | if (is_value_terminator(*cp)) |
3376 | 35.6k | set_error_and_return_default("expected value, saw '"sv, to_sv(*cp), "'"sv); |
3377 | | |
3378 | | // if it's a dotted kvp we need to spawn the parent sub-tables if necessary, |
3379 | | // and set the target table to the second-to-last one in the chain |
3380 | 35.6k | if (key_buffer.size() > 1u) |
3381 | 3.99k | { |
3382 | 631k | for (size_t i = 0; i < key_buffer.size() - 1u; i++) |
3383 | 627k | { |
3384 | 627k | const std::string_view segment = key_buffer[i]; |
3385 | 627k | auto pit = tbl->lower_bound(segment); |
3386 | | |
3387 | | // parent already existed |
3388 | 627k | if (pit != tbl->end() && pit->first == segment) |
3389 | 1.38k | { |
3390 | 1.38k | table* p = pit->second.as_table(); |
3391 | | |
3392 | | // redefinition |
3393 | 1.38k | if TOML_UNLIKELY(!p |
3394 | 1.38k | || !(impl::find(dotted_key_tables.begin(), dotted_key_tables.end(), p) |
3395 | 1.38k | || impl::find(implicit_tables.begin(), implicit_tables.end(), p))) |
3396 | 11 | { |
3397 | 11 | set_error_at(key_buffer.starts[i], |
3398 | 11 | "cannot redefine existing "sv, |
3399 | 11 | to_sv(pit->second.type()), |
3400 | 11 | " as dotted key-value pair"sv); |
3401 | 11 | return_after_error({}); |
3402 | 11 | } |
3403 | | |
3404 | 1.37k | tbl = p; |
3405 | 1.37k | } |
3406 | | |
3407 | | // need to create a new implicit table |
3408 | 626k | else |
3409 | 626k | { |
3410 | 626k | pit = tbl->emplace_hint<table>(pit, make_key(i)); |
3411 | 626k | table& p = pit->second.ref_cast<table>(); |
3412 | 626k | p.source_ = pit->first.source(); |
3413 | | |
3414 | 626k | dotted_key_tables.push_back(&p); |
3415 | 626k | tbl = &p; |
3416 | 626k | } |
3417 | 627k | } |
3418 | 3.99k | } |
3419 | | |
3420 | | // ensure this isn't a redefinition |
3421 | 35.6k | const std::string_view last_segment = key_buffer.back(); |
3422 | 35.6k | auto it = tbl->lower_bound(last_segment); |
3423 | 35.6k | if (it != tbl->end() && it->first == last_segment) |
3424 | 11 | { |
3425 | 11 | set_error("cannot redefine existing "sv, |
3426 | 11 | to_sv(it->second.type()), |
3427 | 11 | " '"sv, |
3428 | 11 | to_sv(recording_buffer), |
3429 | 11 | "'"sv); |
3430 | 11 | return_after_error({}); |
3431 | 11 | } |
3432 | | |
3433 | | // create the key first since the key buffer will likely get overwritten during value parsing (inline |
3434 | | // tables) |
3435 | 35.6k | auto last_key = make_key(key_buffer.size() - 1u); |
3436 | | |
3437 | | // now we can actually parse the value |
3438 | 35.6k | node_ptr val = parse_value(); |
3439 | 35.6k | return_if_error({}); |
3440 | | |
3441 | 35.6k | tbl->emplace_hint<node_ptr>(it, std::move(last_key), std::move(val)); |
3442 | 35.6k | return true; |
3443 | 35.6k | } |
3444 | | |
3445 | | void parse_document() |
3446 | 7.01k | { |
3447 | 7.01k | assert_not_error(); |
3448 | 7.01k | assert_not_eof(); |
3449 | 7.01k | push_parse_scope("root table"sv); |
3450 | | |
3451 | 7.01k | table* current_table = &root; |
3452 | | |
3453 | 7.01k | do |
3454 | 209k | { |
3455 | 209k | return_if_error(); |
3456 | | |
3457 | | // leading whitespace, line endings, comments |
3458 | 209k | if (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3459 | 8.44k | continue; |
3460 | 201k | return_if_error(); |
3461 | | |
3462 | | // [tables] |
3463 | | // [[table array]] |
3464 | 201k | if (*cp == U'[') |
3465 | 172k | current_table = parse_table_header(); |
3466 | | |
3467 | | // bare_keys |
3468 | | // dotted.keys |
3469 | | // "quoted keys" |
3470 | 29.0k | else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) |
3471 | 28.8k | { |
3472 | 28.8k | push_parse_scope("key-value pair"sv); |
3473 | | |
3474 | 28.8k | parse_key_value_pair_and_insert(current_table); |
3475 | | |
3476 | | // handle the rest of the line after the kvp |
3477 | | // (this is not done in parse_key_value_pair() because that is also used for inline tables) |
3478 | 28.8k | consume_leading_whitespace(); |
3479 | 28.8k | return_if_error(); |
3480 | 28.8k | if (!is_eof() && !consume_comment() && !consume_line_break()) |
3481 | 21 | set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); |
3482 | 28.8k | } |
3483 | | |
3484 | 221 | else // ?? |
3485 | 221 | set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv); |
3486 | 201k | } |
3487 | 209k | while (!is_eof()); |
3488 | | |
3489 | 7.01k | auto eof_pos = current_position(1); |
3490 | 7.01k | root.source_.end = eof_pos; |
3491 | 7.01k | if (current_table && current_table != &root && current_table->source_.end <= current_table->source_.begin) |
3492 | 0 | current_table->source_.end = eof_pos; |
3493 | 7.01k | } |
3494 | | |
3495 | | static void update_region_ends(node& nde) noexcept |
3496 | 1.70M | { |
3497 | 1.70M | const auto type = nde.type(); |
3498 | 1.70M | if (type > node_type::array) |
3499 | 807k | return; |
3500 | | |
3501 | 893k | if (type == node_type::table) |
3502 | 755k | { |
3503 | 755k | auto& tbl = nde.ref_cast<table>(); |
3504 | 755k | if (tbl.is_inline()) // inline tables (and all their inline descendants) are already correctly |
3505 | | // terminated |
3506 | 1.17k | return; |
3507 | | |
3508 | 754k | auto end = nde.source_.end; |
3509 | 754k | for (auto&& [k, v] : tbl) |
3510 | 757k | { |
3511 | 757k | TOML_UNUSED(k); |
3512 | 757k | update_region_ends(v); |
3513 | 757k | if (end < v.source_.end) |
3514 | 572k | end = v.source_.end; |
3515 | 757k | } |
3516 | 754k | } |
3517 | 137k | else // arrays |
3518 | 137k | { |
3519 | 137k | auto& arr = nde.ref_cast<array>(); |
3520 | 137k | auto end = nde.source_.end; |
3521 | 137k | for (auto&& v : arr) |
3522 | 939k | { |
3523 | 939k | update_region_ends(v); |
3524 | 939k | if (end < v.source_.end) |
3525 | 17.9k | end = v.source_.end; |
3526 | 939k | } |
3527 | 137k | nde.source_.end = end; |
3528 | 137k | } |
3529 | 893k | } |
3530 | | |
3531 | | public: |
3532 | | parser(utf8_reader_interface&& reader_) // |
3533 | 7.13k | : reader{ reader_ } |
3534 | 7.13k | { |
3535 | 7.13k | root.source_ = { prev_pos, prev_pos, reader.source_path() }; |
3536 | | |
3537 | 7.13k | if (!reader.peek_eof()) |
3538 | 7.08k | { |
3539 | 7.08k | cp = reader.read_next(); |
3540 | | |
3541 | | #if !TOML_EXCEPTIONS |
3542 | | if (reader.error()) |
3543 | | { |
3544 | | err = std::move(reader.error()); |
3545 | | return; |
3546 | | } |
3547 | | #endif |
3548 | | |
3549 | 7.08k | if (cp) |
3550 | 7.01k | parse_document(); |
3551 | 7.08k | } |
3552 | | |
3553 | 7.13k | update_region_ends(root); |
3554 | 7.13k | } |
3555 | | |
3556 | | TOML_NODISCARD |
3557 | | operator parse_result() && noexcept |
3558 | 3.45k | { |
3559 | 3.45k | #if TOML_EXCEPTIONS |
3560 | | |
3561 | 3.45k | return { std::move(root) }; |
3562 | | |
3563 | | #else |
3564 | | |
3565 | | if (err) |
3566 | | return parse_result{ *std::move(err) }; |
3567 | | else |
3568 | | return parse_result{ std::move(root) }; |
3569 | | |
3570 | | #endif |
3571 | 3.45k | } |
3572 | | }; |
3573 | | |
3574 | | TOML_EXTERNAL_LINKAGE |
3575 | | node_ptr parser::parse_array() |
3576 | 6.24k | { |
3577 | 6.24k | return_if_error({}); |
3578 | 6.24k | assert_not_eof(); |
3579 | 6.24k | TOML_ASSERT_ASSUME(*cp == U'['); |
3580 | 6.24k | push_parse_scope("array"sv); |
3581 | | |
3582 | | // skip opening '[' |
3583 | 6.24k | advance_and_return_if_error_or_eof({}); |
3584 | | |
3585 | 6.22k | node_ptr arr_ptr{ new array{} }; |
3586 | 6.22k | array& arr = arr_ptr->ref_cast<array>(); |
3587 | 6.22k | enum class TOML_CLOSED_ENUM parse_type : int |
3588 | 6.22k | { |
3589 | 6.22k | none, |
3590 | 6.22k | comma, |
3591 | 6.22k | val |
3592 | 6.22k | }; |
3593 | 6.22k | parse_type prev = parse_type::none; |
3594 | | |
3595 | 1.65M | while (!is_error()) |
3596 | 1.65M | { |
3597 | 1.68M | while (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3598 | 29.3k | continue; |
3599 | 1.65M | set_error_and_return_if_eof({}); |
3600 | | |
3601 | | // commas - only legal after a value |
3602 | 1.65M | if (*cp == U',') |
3603 | 821k | { |
3604 | 821k | if (prev == parse_type::val) |
3605 | 821k | { |
3606 | 821k | prev = parse_type::comma; |
3607 | 821k | advance_and_return_if_error_or_eof({}); |
3608 | 821k | continue; |
3609 | 821k | } |
3610 | 1 | set_error_and_return_default("expected value or closing ']', saw comma"sv); |
3611 | 1 | } |
3612 | | |
3613 | | // closing ']' |
3614 | 830k | else if (*cp == U']') |
3615 | 2.85k | { |
3616 | 2.85k | advance_and_return_if_error({}); |
3617 | 2.85k | break; |
3618 | 2.85k | } |
3619 | | |
3620 | | // must be a value |
3621 | 827k | else |
3622 | 827k | { |
3623 | 827k | if (prev == parse_type::val) |
3624 | 30 | { |
3625 | 30 | set_error_and_return_default("expected comma or closing ']', saw '"sv, to_sv(*cp), "'"sv); |
3626 | 0 | continue; |
3627 | 30 | } |
3628 | 827k | prev = parse_type::val; |
3629 | | |
3630 | 827k | auto val = parse_value(); |
3631 | 827k | return_if_error({}); |
3632 | | |
3633 | 827k | if (!arr.capacity()) |
3634 | 2.61k | arr.reserve(4u); |
3635 | 827k | arr.emplace_back<node_ptr>(std::move(val)); |
3636 | 827k | } |
3637 | 1.65M | } |
3638 | | |
3639 | 6.05k | return_if_error({}); |
3640 | 6.05k | return arr_ptr; |
3641 | 6.22k | } |
3642 | | |
3643 | | TOML_EXTERNAL_LINKAGE |
3644 | | node_ptr parser::parse_inline_table() |
3645 | 7.01k | { |
3646 | 7.01k | return_if_error({}); |
3647 | 7.01k | assert_not_eof(); |
3648 | 7.01k | TOML_ASSERT_ASSUME(*cp == U'{'); |
3649 | 7.01k | push_parse_scope("inline table"sv); |
3650 | | |
3651 | | // skip opening '{' |
3652 | 7.01k | advance_and_return_if_error_or_eof({}); |
3653 | | |
3654 | 6.99k | node_ptr tbl_ptr{ new table{} }; |
3655 | 6.99k | table& tbl = tbl_ptr->ref_cast<table>(); |
3656 | 6.99k | tbl.is_inline(true); |
3657 | 6.99k | table_vector_scope table_scope{ open_inline_tables, tbl }; |
3658 | | |
3659 | 6.99k | enum class TOML_CLOSED_ENUM parse_type : int |
3660 | 6.99k | { |
3661 | 6.99k | none, |
3662 | 6.99k | comma, |
3663 | 6.99k | kvp |
3664 | 6.99k | }; |
3665 | 6.99k | parse_type prev = parse_type::none; |
3666 | 15.1k | while (!is_error()) |
3667 | 12.1k | { |
3668 | | if constexpr (TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables) |
3669 | | { |
3670 | | while (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3671 | | continue; |
3672 | | } |
3673 | | else |
3674 | 12.1k | { |
3675 | 14.6k | while (consume_leading_whitespace()) |
3676 | 2.55k | continue; |
3677 | 12.1k | } |
3678 | 12.1k | return_if_error({}); |
3679 | 12.1k | set_error_and_return_if_eof({}); |
3680 | | |
3681 | | // commas - only legal after a key-value pair |
3682 | 12.1k | if (*cp == U',') |
3683 | 998 | { |
3684 | 998 | if (prev == parse_type::kvp) |
3685 | 997 | { |
3686 | 997 | prev = parse_type::comma; |
3687 | 997 | advance_and_return_if_error_or_eof({}); |
3688 | 997 | } |
3689 | 1 | else |
3690 | 998 | set_error_and_return_default("expected key-value pair or closing '}', saw comma"sv); |
3691 | 998 | } |
3692 | | |
3693 | | // closing '}' |
3694 | 11.1k | else if (*cp == U'}') |
3695 | 3.96k | { |
3696 | | if constexpr (!TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables) |
3697 | 3.96k | { |
3698 | 3.96k | if (prev == parse_type::comma) |
3699 | 0 | { |
3700 | 0 | set_error_and_return_default("expected key-value pair, saw closing '}' (dangling comma)"sv); |
3701 | 0 | continue; |
3702 | 0 | } |
3703 | 3.96k | } |
3704 | 3.96k | advance_and_return_if_error({}); |
3705 | 3.96k | break; |
3706 | 3.96k | } |
3707 | | |
3708 | | // key-value pair |
3709 | 7.14k | else if (is_string_delimiter(*cp) || is_bare_key_character(*cp)) |
3710 | 7.11k | { |
3711 | 7.11k | if (prev == parse_type::kvp) |
3712 | 7.11k | set_error_and_return_default("expected comma or closing '}', saw '"sv, to_sv(*cp), "'"sv); |
3713 | 7.11k | else |
3714 | 7.11k | { |
3715 | 7.11k | prev = parse_type::kvp; |
3716 | 7.11k | parse_key_value_pair_and_insert(&tbl); |
3717 | 7.11k | } |
3718 | 7.11k | } |
3719 | | |
3720 | | /// ??? |
3721 | 35 | else |
3722 | 35 | set_error_and_return_default("expected key or closing '}', saw '"sv, to_sv(*cp), "'"sv); |
3723 | 12.1k | } |
3724 | | |
3725 | 6.95k | return_if_error({}); |
3726 | 6.95k | return tbl_ptr; |
3727 | 6.99k | } |
3728 | | |
3729 | | TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS |
3730 | | } |
3731 | | TOML_IMPL_NAMESPACE_END; |
3732 | | |
3733 | | #undef TOML_RETURNS_BY_THROWING |
3734 | | #undef advance_and_return_if_error |
3735 | | #undef advance_and_return_if_error_or_eof |
3736 | | #undef assert_not_eof |
3737 | | #undef assert_not_error |
3738 | | #undef is_eof |
3739 | | #undef is_error |
3740 | | #undef parse_error_break |
3741 | | #undef push_parse_scope |
3742 | | #undef push_parse_scope_1 |
3743 | | #undef push_parse_scope_2 |
3744 | | #undef return_after_error |
3745 | | #undef return_if_eof |
3746 | | #undef return_if_error |
3747 | | #undef return_if_error_or_eof |
3748 | | #undef set_error_and_return |
3749 | | #undef set_error_and_return_default |
3750 | | #undef set_error_and_return_if_eof |
3751 | | #undef utf8_buffered_reader_error_check |
3752 | | #undef utf8_reader_error |
3753 | | #undef utf8_reader_error_check |
3754 | | #undef utf8_reader_return_after_error |
3755 | | |
3756 | | //#--------------------------------------------------------------------------------------------------------------------- |
3757 | | //# PARSER PUBLIC IMPLEMENTATION |
3758 | | //#--------------------------------------------------------------------------------------------------------------------- |
3759 | | |
3760 | | TOML_ANON_NAMESPACE_START |
3761 | | { |
3762 | | TOML_NODISCARD |
3763 | | TOML_INTERNAL_LINKAGE |
3764 | | parse_result do_parse(utf8_reader_interface && reader) |
3765 | 7.13k | { |
3766 | 7.13k | return impl::parser{ std::move(reader) }; |
3767 | 7.13k | } |
3768 | | |
3769 | | TOML_NODISCARD |
3770 | | TOML_INTERNAL_LINKAGE |
3771 | | parse_result do_parse_file(std::string_view file_path) |
3772 | 0 | { |
3773 | 0 | #if TOML_EXCEPTIONS |
3774 | 0 | #define TOML_PARSE_FILE_ERROR(msg, path) \ |
3775 | 0 | throw parse_error(msg, source_position{}, std::make_shared<const std::string>(std::move(path))) |
3776 | 0 | #else |
3777 | 0 | #define TOML_PARSE_FILE_ERROR(msg, path) \ |
3778 | 0 | return parse_result(parse_error(msg, source_position{}, std::make_shared<const std::string>(std::move(path)))) |
3779 | 0 | #endif |
3780 | 0 |
|
3781 | 0 | std::string file_path_str(file_path); |
3782 | 0 |
|
3783 | 0 | // open file with a custom-sized stack buffer |
3784 | 0 | std::ifstream file; |
3785 | 0 | TOML_OVERALIGNED char file_buffer[sizeof(void*) * 1024u]; |
3786 | 0 | file.rdbuf()->pubsetbuf(file_buffer, sizeof(file_buffer)); |
3787 | 0 | #if TOML_WINDOWS && !(defined(__MINGW32__) || defined(__MINGW64__)) |
3788 | 0 | file.open(impl::widen(file_path_str).c_str(), std::ifstream::in | std::ifstream::binary | std::ifstream::ate); |
3789 | 0 | #else |
3790 | 0 | file.open(file_path_str, std::ifstream::in | std::ifstream::binary | std::ifstream::ate); |
3791 | 0 | #endif |
3792 | 0 | if (!file.is_open()) |
3793 | 0 | TOML_PARSE_FILE_ERROR("File could not be opened for reading", file_path_str); |
3794 | 0 |
|
3795 | 0 | // get size |
3796 | 0 | const auto file_size = file.tellg(); |
3797 | 0 | if (file_size == -1) |
3798 | 0 | TOML_PARSE_FILE_ERROR("Could not determine file size", file_path_str); |
3799 | 0 | file.seekg(0, std::ifstream::beg); |
3800 | 0 |
|
3801 | 0 | // read the whole file into memory first if the file isn't too large |
3802 | 0 | constexpr auto large_file_threshold = 1024 * 1024 * 2; // 2 MB |
3803 | 0 | if (file_size <= large_file_threshold) |
3804 | 0 | { |
3805 | 0 | std::vector<char> file_data; |
3806 | 0 | file_data.resize(static_cast<size_t>(file_size)); |
3807 | 0 | file.read(file_data.data(), static_cast<std::streamsize>(file_size)); |
3808 | 0 | return parse(std::string_view{ file_data.data(), file_data.size() }, std::move(file_path_str)); |
3809 | 0 | } |
3810 | 0 |
|
3811 | 0 | // otherwise parse it using the streams |
3812 | 0 | else |
3813 | 0 | return parse(file, std::move(file_path_str)); |
3814 | 0 |
|
3815 | 0 | #undef TOML_PARSE_FILE_ERROR |
3816 | 0 | } |
3817 | | } |
3818 | | TOML_ANON_NAMESPACE_END; |
3819 | | |
3820 | | TOML_NAMESPACE_START |
3821 | | { |
3822 | | TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex); |
3823 | | |
3824 | | TOML_EXTERNAL_LINKAGE |
3825 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path) |
3826 | 7.13k | { |
3827 | 7.13k | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3828 | 7.13k | } |
3829 | | |
3830 | | TOML_EXTERNAL_LINKAGE |
3831 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::string && source_path) |
3832 | 0 | { |
3833 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3834 | 0 | } |
3835 | | |
3836 | | TOML_EXTERNAL_LINKAGE |
3837 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::string_view source_path) |
3838 | 0 | { |
3839 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3840 | 0 | } |
3841 | | |
3842 | | TOML_EXTERNAL_LINKAGE |
3843 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::string && source_path) |
3844 | 0 | { |
3845 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3846 | 0 | } |
3847 | | |
3848 | | TOML_EXTERNAL_LINKAGE |
3849 | | parse_result TOML_CALLCONV parse_file(std::string_view file_path) |
3850 | 0 | { |
3851 | 0 | return TOML_ANON_NAMESPACE::do_parse_file(file_path); |
3852 | 0 | } |
3853 | | |
3854 | | #if TOML_HAS_CHAR8 |
3855 | | |
3856 | | TOML_EXTERNAL_LINKAGE |
3857 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string_view source_path) |
3858 | | { |
3859 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3860 | | } |
3861 | | |
3862 | | TOML_EXTERNAL_LINKAGE |
3863 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string && source_path) |
3864 | | { |
3865 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3866 | | } |
3867 | | |
3868 | | TOML_EXTERNAL_LINKAGE |
3869 | | parse_result TOML_CALLCONV parse_file(std::u8string_view file_path) |
3870 | | { |
3871 | | std::string file_path_str; |
3872 | | file_path_str.resize(file_path.length()); |
3873 | | memcpy(file_path_str.data(), file_path.data(), file_path.length()); |
3874 | | return TOML_ANON_NAMESPACE::do_parse_file(file_path_str); |
3875 | | } |
3876 | | |
3877 | | #endif // TOML_HAS_CHAR8 |
3878 | | |
3879 | | #if TOML_ENABLE_WINDOWS_COMPAT |
3880 | | |
3881 | | TOML_EXTERNAL_LINKAGE |
3882 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::wstring_view source_path) |
3883 | | { |
3884 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3885 | | } |
3886 | | |
3887 | | TOML_EXTERNAL_LINKAGE |
3888 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::wstring_view source_path) |
3889 | | { |
3890 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3891 | | } |
3892 | | |
3893 | | TOML_EXTERNAL_LINKAGE |
3894 | | parse_result TOML_CALLCONV parse_file(std::wstring_view file_path) |
3895 | | { |
3896 | | return TOML_ANON_NAMESPACE::do_parse_file(impl::narrow(file_path)); |
3897 | | } |
3898 | | |
3899 | | #endif // TOML_ENABLE_WINDOWS_COMPAT |
3900 | | |
3901 | | #if TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT |
3902 | | |
3903 | | TOML_EXTERNAL_LINKAGE |
3904 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::wstring_view source_path) |
3905 | | { |
3906 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3907 | | } |
3908 | | |
3909 | | #endif // TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT |
3910 | | |
3911 | | TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS |
3912 | | } |
3913 | | TOML_NAMESPACE_END; |
3914 | | |
3915 | | #undef TOML_OVERALIGNED |
3916 | | #include "header_end.hpp" |
3917 | | #endif // TOML_ENABLE_PARSER |