/src/tomlplusplus/include/toml++/impl/parser.inl
Line | Count | Source |
1 | | //# This file is a part of toml++ and is subject to the the terms of the MIT license. |
2 | | //# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au> |
3 | | //# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. |
4 | | // SPDX-License-Identifier: MIT |
5 | | #pragma once |
6 | | |
7 | | #include "preprocessor.hpp" |
8 | | //# {{ |
9 | | #if !TOML_IMPLEMENTATION |
10 | | #error This is an implementation-only header. |
11 | | #endif |
12 | | //# }} |
13 | | #if TOML_ENABLE_PARSER |
14 | | |
15 | | #include "parser.hpp" |
16 | | #include "std_optional.hpp" |
17 | | #include "source_region.hpp" |
18 | | #include "parse_error.hpp" |
19 | | #include "date_time.hpp" |
20 | | #include "value.hpp" |
21 | | #include "array.hpp" |
22 | | #include "table.hpp" |
23 | | #include "unicode.hpp" |
24 | | TOML_DISABLE_WARNINGS; |
25 | | #include <istream> |
26 | | #include <fstream> |
27 | | #if TOML_INT_CHARCONV || TOML_FLOAT_CHARCONV |
28 | | #include <charconv> |
29 | | #endif |
30 | | #if !TOML_INT_CHARCONV || !TOML_FLOAT_CHARCONV |
31 | | #include <sstream> |
32 | | #endif |
33 | | #if !TOML_INT_CHARCONV |
34 | | #include <iomanip> |
35 | | #endif |
36 | | TOML_ENABLE_WARNINGS; |
37 | | #include "header_start.hpp" |
38 | | |
39 | | //#--------------------------------------------------------------------------------------------------------------------- |
40 | | //# UTF8 STREAMS |
41 | | //#--------------------------------------------------------------------------------------------------------------------- |
42 | | |
43 | | TOML_ANON_NAMESPACE_START |
44 | | { |
45 | | template <typename T> |
46 | | class utf8_byte_stream; |
47 | | |
48 | | TOML_INTERNAL_LINKAGE |
49 | | constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv; |
50 | | |
51 | | template <typename Char> |
52 | | class utf8_byte_stream<std::basic_string_view<Char>> |
53 | | { |
54 | | static_assert(sizeof(Char) == 1); |
55 | | |
56 | | private: |
57 | | std::basic_string_view<Char> source_; |
58 | | size_t position_ = {}; |
59 | | |
60 | | public: |
61 | | TOML_NODISCARD_CTOR |
62 | | explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept // |
63 | 6.35k | : source_{ sv } |
64 | 6.35k | { |
65 | | // skip bom |
66 | 6.35k | if (source_.length() >= 3u && memcmp(utf8_byte_order_mark.data(), source_.data(), 3u) == 0) |
67 | 23 | position_ += 3u; |
68 | 6.35k | } |
69 | | |
70 | | TOML_CONST_INLINE_GETTER |
71 | | constexpr bool error() const noexcept |
72 | 1.30M | { |
73 | 1.30M | return false; |
74 | 1.30M | } |
75 | | |
76 | | TOML_PURE_INLINE_GETTER |
77 | | constexpr bool eof() const noexcept |
78 | 2.62M | { |
79 | 2.62M | return position_ >= source_.length(); |
80 | 2.62M | } |
81 | | |
82 | | TOML_PURE_INLINE_GETTER |
83 | | explicit constexpr operator bool() const noexcept |
84 | 1.31M | { |
85 | 1.31M | return !eof(); |
86 | 1.31M | } |
87 | | |
88 | | TOML_PURE_INLINE_GETTER |
89 | | constexpr bool peek_eof() const noexcept |
90 | 6.35k | { |
91 | 6.35k | return eof(); |
92 | 6.35k | } |
93 | | |
94 | | TOML_NODISCARD |
95 | | TOML_ATTR(nonnull) |
96 | | size_t operator()(void* dest, size_t num) noexcept |
97 | 1.30M | { |
98 | 1.30M | TOML_ASSERT_ASSUME(!eof()); |
99 | | |
100 | 1.30M | num = impl::min(position_ + num, source_.length()) - position_; |
101 | 1.30M | std::memcpy(dest, source_.data() + position_, num); |
102 | 1.30M | position_ += num; |
103 | 1.30M | return num; |
104 | 1.30M | } |
105 | | }; |
106 | | |
107 | | template <> |
108 | | class utf8_byte_stream<std::istream> |
109 | | { |
110 | | private: |
111 | | std::istream* source_; |
112 | | |
113 | | public: |
114 | | TOML_NODISCARD_CTOR |
115 | | explicit utf8_byte_stream(std::istream& stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) // |
116 | | : source_{ &stream } |
117 | 0 | { |
118 | 0 | if (!*this) // eof, bad |
119 | 0 | return; |
120 | 0 |
|
121 | 0 | const auto initial_pos = source_->tellg(); |
122 | 0 | char bom[3]; |
123 | 0 | source_->read(bom, 3); |
124 | 0 | if (source_->bad() || (source_->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3u) == 0)) |
125 | 0 | return; |
126 | 0 |
|
127 | 0 | source_->clear(); |
128 | 0 | source_->seekg(initial_pos, std::istream::beg); |
129 | 0 | } |
130 | | |
131 | | TOML_PURE_INLINE_GETTER |
132 | | bool error() const noexcept |
133 | 0 | { |
134 | 0 | return !!(source_->rdstate() & std::istream::badbit); |
135 | 0 | } |
136 | | |
137 | | TOML_PURE_INLINE_GETTER |
138 | | bool eof() const noexcept |
139 | 0 | { |
140 | 0 | return !!(source_->rdstate() & std::istream::eofbit); |
141 | 0 | } |
142 | | |
143 | | TOML_PURE_INLINE_GETTER |
144 | | explicit operator bool() const noexcept |
145 | 0 | { |
146 | 0 | return !(source_->rdstate() & (std::istream::badbit | std::istream::eofbit)); |
147 | 0 | } |
148 | | |
149 | | TOML_NODISCARD |
150 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
151 | 0 | { |
152 | 0 | return eof() || source_->peek() == std::istream::traits_type::eof(); |
153 | 0 | } |
154 | | |
155 | | TOML_NODISCARD |
156 | | TOML_ATTR(nonnull) |
157 | | size_t operator()(void* dest, size_t num) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
158 | 0 | { |
159 | 0 | TOML_ASSERT(*this); |
160 | 0 |
|
161 | 0 | source_->read(static_cast<char*>(dest), static_cast<std::streamsize>(num)); |
162 | 0 | return static_cast<size_t>(source_->gcount()); |
163 | 0 | } |
164 | | }; |
165 | | |
166 | | struct utf8_codepoint |
167 | | { |
168 | | char32_t value; |
169 | | char bytes[4]; |
170 | | size_t count; |
171 | | source_position position; |
172 | | |
173 | | TOML_PURE_INLINE_GETTER |
174 | | constexpr operator const char32_t&() const noexcept |
175 | 143M | { |
176 | 143M | return value; |
177 | 143M | } |
178 | | |
179 | | TOML_PURE_INLINE_GETTER |
180 | | constexpr const char32_t& operator*() const noexcept |
181 | 1.36M | { |
182 | 1.36M | return value; |
183 | 1.36M | } |
184 | | }; |
185 | | static_assert(std::is_trivially_default_constructible_v<utf8_codepoint> && std::is_trivially_copyable_v<utf8_codepoint>); |
186 | | static_assert(std::is_standard_layout_v<utf8_codepoint>); |
187 | | |
188 | | struct TOML_ABSTRACT_INTERFACE utf8_reader_interface |
189 | | { |
190 | | TOML_NODISCARD |
191 | | virtual const source_path_ptr& source_path() const noexcept = 0; |
192 | | |
193 | | TOML_NODISCARD |
194 | | virtual const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0; |
195 | | |
196 | | TOML_NODISCARD |
197 | | virtual bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0; |
198 | | |
199 | | #if !TOML_EXCEPTIONS |
200 | | |
201 | | TOML_NODISCARD |
202 | | virtual optional<parse_error>&& error() noexcept = 0; |
203 | | |
204 | | #endif |
205 | | |
206 | 6.35k | virtual ~utf8_reader_interface() noexcept = default; |
207 | | }; |
208 | | |
209 | | #if TOML_EXCEPTIONS |
210 | 102 | #define utf8_reader_error(...) throw parse_error(__VA_ARGS__) |
211 | 0 | #define utf8_reader_return_after_error(...) static_assert(true) |
212 | 41.7M | #define utf8_reader_error_check(...) static_assert(true) |
213 | | #else |
214 | | #define utf8_reader_error(...) err_.emplace(__VA_ARGS__) |
215 | | #define utf8_reader_return_after_error(...) return __VA_ARGS__ |
216 | | #define utf8_reader_error_check(...) \ |
217 | | do \ |
218 | | { \ |
219 | | if TOML_UNLIKELY(err_) \ |
220 | | return __VA_ARGS__; \ |
221 | | } \ |
222 | | while (false) |
223 | | |
224 | | #endif |
225 | | |
226 | | #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) |
227 | | #define TOML_OVERALIGNED |
228 | | #else |
229 | 1.30M | #define TOML_OVERALIGNED alignas(32) |
230 | | #endif |
231 | | |
232 | | template <typename T> |
233 | | class TOML_EMPTY_BASES utf8_reader final : public utf8_reader_interface |
234 | | { |
235 | | private: |
236 | | static constexpr size_t block_capacity = 32; |
237 | | utf8_byte_stream<T> stream_; |
238 | | source_position next_pos_ = { 1, 1 }; |
239 | | |
240 | | impl::utf8_decoder decoder_; |
241 | | struct currently_decoding_t |
242 | | { |
243 | | char bytes[4]; |
244 | | size_t count; |
245 | | } currently_decoding_; |
246 | | |
247 | | struct codepoints_t |
248 | | { |
249 | | TOML_OVERALIGNED utf8_codepoint buffer[block_capacity]; |
250 | | size_t current; |
251 | | size_t count; |
252 | | } codepoints_; |
253 | | |
254 | | source_path_ptr source_path_; |
255 | | |
256 | | #if !TOML_EXCEPTIONS |
257 | | optional<parse_error> err_; |
258 | | #endif |
259 | | |
260 | | bool read_next_block() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
261 | 1.30M | { |
262 | 1.30M | TOML_ASSERT(stream_); |
263 | | |
264 | 1.30M | TOML_OVERALIGNED char raw_bytes[block_capacity]; |
265 | 1.30M | size_t raw_bytes_read; |
266 | | |
267 | | // read the next raw (encoded) block in from the stream |
268 | | if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) |
269 | 1.30M | { |
270 | 1.30M | raw_bytes_read = stream_(raw_bytes, block_capacity); |
271 | | } |
272 | | #if TOML_EXCEPTIONS |
273 | | else |
274 | | { |
275 | | try |
276 | | { |
277 | | raw_bytes_read = stream_(raw_bytes, block_capacity); |
278 | | } |
279 | | catch (const std::exception& exc) |
280 | | { |
281 | | throw parse_error{ exc.what(), next_pos_, source_path_ }; |
282 | | } |
283 | | catch (...) |
284 | | { |
285 | | throw parse_error{ "An unspecified error occurred", next_pos_, source_path_ }; |
286 | | } |
287 | | } |
288 | 1.30M | #endif // TOML_EXCEPTIONS |
289 | | |
290 | | // handle a zero-byte read |
291 | 1.30M | if TOML_UNLIKELY(!raw_bytes_read) |
292 | 0 | { |
293 | 0 | if (stream_.eof()) |
294 | 0 | { |
295 | | // EOF only sets the error state if the decoder wants more input, otherwise |
296 | | // a zero-byte read might have just caused the underlying stream to realize it's exhaused and set |
297 | | // the EOF flag, and that's totally fine |
298 | 0 | if (decoder_.needs_more_input()) |
299 | 0 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", |
300 | 0 | next_pos_, |
301 | 0 | source_path_); |
302 | 0 | } |
303 | 0 | else |
304 | 0 | { |
305 | 0 | utf8_reader_error("Reading from the underlying stream failed - zero bytes read", |
306 | 0 | next_pos_, |
307 | 0 | source_path_); |
308 | 0 | } |
309 | 0 | return false; |
310 | 0 | } |
311 | | |
312 | 1.30M | TOML_ASSERT_ASSUME(raw_bytes_read); |
313 | 1.30M | std::memset(&codepoints_, 0, sizeof(codepoints_)); |
314 | | |
315 | | // helper for calculating decoded codepoint line+cols |
316 | 1.30M | const auto calc_positions = [&]() noexcept |
317 | 1.30M | { |
318 | 43.0M | for (size_t i = 0; i < codepoints_.count; i++) |
319 | 41.7M | { |
320 | 41.7M | auto& cp = codepoints_.buffer[i]; |
321 | 41.7M | cp.position = next_pos_; |
322 | | |
323 | 41.7M | if (cp == U'\n') |
324 | 1.56M | { |
325 | 1.56M | next_pos_.line++; |
326 | 1.56M | next_pos_.column = source_index{ 1 }; |
327 | 1.56M | } |
328 | 40.1M | else |
329 | 40.1M | next_pos_.column++; |
330 | 41.7M | } |
331 | 1.30M | }; |
332 | | |
333 | | // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain ASCII |
334 | 1.30M | const auto ascii_fast_path = !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read); |
335 | | |
336 | | // ASCII fast-path |
337 | 1.30M | if (ascii_fast_path) |
338 | 1.29M | { |
339 | 1.29M | decoder_.reset(); |
340 | 1.29M | currently_decoding_.count = {}; |
341 | | |
342 | 1.29M | codepoints_.count = raw_bytes_read; |
343 | 42.7M | for (size_t i = 0; i < codepoints_.count; i++) |
344 | 41.4M | { |
345 | 41.4M | auto& cp = codepoints_.buffer[i]; |
346 | 41.4M | cp.value = static_cast<char32_t>(raw_bytes[i]); |
347 | 41.4M | cp.bytes[0] = raw_bytes[i]; |
348 | 41.4M | cp.count = 1u; |
349 | 41.4M | } |
350 | 1.29M | } |
351 | | |
352 | | // UTF-8 slow-path |
353 | 10.9k | else |
354 | 10.9k | { |
355 | | // helper for getting precise error location |
356 | 10.9k | const auto error_pos = [&]() noexcept -> const source_position& |
357 | 10.9k | { // |
358 | 102 | return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position : next_pos_; |
359 | 102 | }; |
360 | | |
361 | 331k | for (size_t i = 0; i < raw_bytes_read; i++) |
362 | 320k | { |
363 | 320k | decoder_(static_cast<uint8_t>(raw_bytes[i])); |
364 | 320k | if TOML_UNLIKELY(decoder_.error()) |
365 | 73 | { |
366 | 73 | calc_positions(); |
367 | 73 | utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_); |
368 | 0 | utf8_reader_return_after_error(false); |
369 | 0 | } |
370 | | |
371 | 320k | currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i]; |
372 | | |
373 | 320k | if (decoder_.has_code_point()) |
374 | 284k | { |
375 | 284k | auto& cp = codepoints_.buffer[codepoints_.count++]; |
376 | | |
377 | 284k | cp.value = decoder_.codepoint; |
378 | 284k | cp.count = currently_decoding_.count; |
379 | 284k | std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count); |
380 | 284k | currently_decoding_.count = {}; |
381 | 284k | } |
382 | 36.7k | else if TOML_UNLIKELY(currently_decoding_.count == 4u) |
383 | 0 | { |
384 | 0 | calc_positions(); |
385 | 0 | utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_); |
386 | 0 | utf8_reader_return_after_error(false); |
387 | 0 | } |
388 | 320k | } |
389 | 10.9k | if TOML_UNLIKELY(decoder_.needs_more_input() && stream_.eof()) |
390 | 29 | { |
391 | 29 | calc_positions(); |
392 | 29 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", |
393 | 29 | error_pos(), |
394 | 29 | source_path_); |
395 | 0 | utf8_reader_return_after_error(false); |
396 | 0 | } |
397 | 10.9k | } |
398 | | |
399 | 1.30M | TOML_ASSERT_ASSUME(codepoints_.count); |
400 | 1.30M | calc_positions(); |
401 | | |
402 | | // handle general I/O errors |
403 | | // (down here so the next_pos_ benefits from calc_positions()) |
404 | 1.30M | if TOML_UNLIKELY(stream_.error()) |
405 | 0 | { |
406 | 0 | utf8_reader_error("An I/O error occurred while reading from the underlying stream", |
407 | 0 | next_pos_, |
408 | 0 | source_path_); |
409 | 0 | utf8_reader_return_after_error(false); |
410 | 0 | } |
411 | | |
412 | 1.30M | return true; |
413 | 1.30M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::read_next_block() Line | Count | Source | 261 | 1.30M | { | 262 | 1.30M | TOML_ASSERT(stream_); | 263 | | | 264 | 1.30M | TOML_OVERALIGNED char raw_bytes[block_capacity]; | 265 | 1.30M | size_t raw_bytes_read; | 266 | | | 267 | | // read the next raw (encoded) block in from the stream | 268 | | if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) | 269 | 1.30M | { | 270 | 1.30M | raw_bytes_read = stream_(raw_bytes, block_capacity); | 271 | | } | 272 | | #if TOML_EXCEPTIONS | 273 | | else | 274 | | { | 275 | | try | 276 | | { | 277 | | raw_bytes_read = stream_(raw_bytes, block_capacity); | 278 | | } | 279 | | catch (const std::exception& exc) | 280 | | { | 281 | | throw parse_error{ exc.what(), next_pos_, source_path_ }; | 282 | | } | 283 | | catch (...) | 284 | | { | 285 | | throw parse_error{ "An unspecified error occurred", next_pos_, source_path_ }; | 286 | | } | 287 | | } | 288 | 1.30M | #endif // TOML_EXCEPTIONS | 289 | | | 290 | | // handle a zero-byte read | 291 | 1.30M | if TOML_UNLIKELY(!raw_bytes_read) | 292 | 0 | { | 293 | 0 | if (stream_.eof()) | 294 | 0 | { | 295 | | // EOF only sets the error state if the decoder wants more input, otherwise | 296 | | // a zero-byte read might have just caused the underlying stream to realize it's exhaused and set | 297 | | // the EOF flag, and that's totally fine | 298 | 0 | if (decoder_.needs_more_input()) | 299 | 0 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", | 300 | 0 | next_pos_, | 301 | 0 | source_path_); | 302 | 0 | } | 303 | 0 | else | 304 | 0 | { | 305 | 0 | utf8_reader_error("Reading from the underlying stream failed - zero bytes read", | 306 | 0 | next_pos_, | 307 | 0 | source_path_); | 308 | 0 | } | 309 | 0 | return false; | 310 | 0 | } | 311 | | | 312 | 1.30M | TOML_ASSERT_ASSUME(raw_bytes_read); | 313 | 1.30M | std::memset(&codepoints_, 0, sizeof(codepoints_)); | 314 | | | 315 | | // helper for calculating decoded codepoint line+cols | 316 | 1.30M | const auto calc_positions = [&]() noexcept | 317 | 1.30M | { | 318 | 1.30M | for (size_t i = 0; i < codepoints_.count; i++) | 319 | 1.30M | { | 320 | 1.30M | auto& cp = codepoints_.buffer[i]; | 321 | 1.30M | cp.position = next_pos_; | 322 | | | 323 | 1.30M | if (cp == U'\n') | 324 | 1.30M | { | 325 | 1.30M | next_pos_.line++; | 326 | 1.30M | next_pos_.column = source_index{ 1 }; | 327 | 1.30M | } | 328 | 1.30M | else | 329 | 1.30M | next_pos_.column++; | 330 | 1.30M | } | 331 | 1.30M | }; | 332 | | | 333 | | // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain ASCII | 334 | 1.30M | const auto ascii_fast_path = !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read); | 335 | | | 336 | | // ASCII fast-path | 337 | 1.30M | if (ascii_fast_path) | 338 | 1.29M | { | 339 | 1.29M | decoder_.reset(); | 340 | 1.29M | currently_decoding_.count = {}; | 341 | | | 342 | 1.29M | codepoints_.count = raw_bytes_read; | 343 | 42.7M | for (size_t i = 0; i < codepoints_.count; i++) | 344 | 41.4M | { | 345 | 41.4M | auto& cp = codepoints_.buffer[i]; | 346 | 41.4M | cp.value = static_cast<char32_t>(raw_bytes[i]); | 347 | 41.4M | cp.bytes[0] = raw_bytes[i]; | 348 | 41.4M | cp.count = 1u; | 349 | 41.4M | } | 350 | 1.29M | } | 351 | | | 352 | | // UTF-8 slow-path | 353 | 10.9k | else | 354 | 10.9k | { | 355 | | // helper for getting precise error location | 356 | 10.9k | const auto error_pos = [&]() noexcept -> const source_position& | 357 | 10.9k | { // | 358 | 10.9k | return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position : next_pos_; | 359 | 10.9k | }; | 360 | | | 361 | 331k | for (size_t i = 0; i < raw_bytes_read; i++) | 362 | 320k | { | 363 | 320k | decoder_(static_cast<uint8_t>(raw_bytes[i])); | 364 | 320k | if TOML_UNLIKELY(decoder_.error()) | 365 | 73 | { | 366 | 73 | calc_positions(); | 367 | 73 | utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_); | 368 | 0 | utf8_reader_return_after_error(false); | 369 | 0 | } | 370 | | | 371 | 320k | currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i]; | 372 | | | 373 | 320k | if (decoder_.has_code_point()) | 374 | 284k | { | 375 | 284k | auto& cp = codepoints_.buffer[codepoints_.count++]; | 376 | | | 377 | 284k | cp.value = decoder_.codepoint; | 378 | 284k | cp.count = currently_decoding_.count; | 379 | 284k | std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count); | 380 | 284k | currently_decoding_.count = {}; | 381 | 284k | } | 382 | 36.7k | else if TOML_UNLIKELY(currently_decoding_.count == 4u) | 383 | 0 | { | 384 | 0 | calc_positions(); | 385 | 0 | utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_); | 386 | 0 | utf8_reader_return_after_error(false); | 387 | 0 | } | 388 | 320k | } | 389 | 10.9k | if TOML_UNLIKELY(decoder_.needs_more_input() && stream_.eof()) | 390 | 29 | { | 391 | 29 | calc_positions(); | 392 | 29 | utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence", | 393 | 29 | error_pos(), | 394 | 29 | source_path_); | 395 | 0 | utf8_reader_return_after_error(false); | 396 | 0 | } | 397 | 10.9k | } | 398 | | | 399 | 1.30M | TOML_ASSERT_ASSUME(codepoints_.count); | 400 | 1.30M | calc_positions(); | 401 | | | 402 | | // handle general I/O errors | 403 | | // (down here so the next_pos_ benefits from calc_positions()) | 404 | 1.30M | if TOML_UNLIKELY(stream_.error()) | 405 | 0 | { | 406 | 0 | utf8_reader_error("An I/O error occurred while reading from the underlying stream", | 407 | 0 | next_pos_, | 408 | 0 | source_path_); | 409 | 0 | utf8_reader_return_after_error(false); | 410 | 0 | } | 411 | | | 412 | 1.30M | return true; | 413 | 1.30M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::read_next_block() |
414 | | |
415 | | public: |
416 | | template <typename U, typename String = std::string_view> |
417 | | TOML_NODISCARD_CTOR |
418 | | explicit utf8_reader(U&& source, String&& source_path = {}) noexcept( |
419 | | std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>) |
420 | 6.35k | : stream_{ static_cast<U&&>(source) } |
421 | 6.35k | { |
422 | 6.35k | currently_decoding_.count = {}; |
423 | | |
424 | 6.35k | codepoints_.current = {}; |
425 | 6.35k | codepoints_.count = {}; |
426 | | |
427 | 6.35k | if (!source_path.empty()) |
428 | 0 | source_path_ = std::make_shared<const std::string>(static_cast<String&&>(source_path)); |
429 | 6.35k | } |
430 | | |
431 | | TOML_PURE_INLINE_GETTER |
432 | | const source_path_ptr& source_path() const noexcept final |
433 | 1.03M | { |
434 | 1.03M | return source_path_; |
435 | 1.03M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::source_path() const Line | Count | Source | 433 | 1.03M | { | 434 | 1.03M | return source_path_; | 435 | 1.03M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::source_path() const |
436 | | |
437 | | TOML_NODISCARD |
438 | | const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final |
439 | 41.7M | { |
440 | 41.7M | utf8_reader_error_check({}); |
441 | | |
442 | 41.7M | if (codepoints_.current == codepoints_.count) |
443 | 1.31M | { |
444 | 1.31M | if TOML_UNLIKELY(!stream_ || !read_next_block()) |
445 | 5.11k | return nullptr; |
446 | | |
447 | 1.30M | TOML_ASSERT_ASSUME(!codepoints_.current); |
448 | 1.30M | } |
449 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.count); |
450 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity); |
451 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count); |
452 | | |
453 | 41.7M | return &codepoints_.buffer[codepoints_.current++]; |
454 | 41.7M | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::read_next() Line | Count | Source | 439 | 41.7M | { | 440 | 41.7M | utf8_reader_error_check({}); | 441 | | | 442 | 41.7M | if (codepoints_.current == codepoints_.count) | 443 | 1.31M | { | 444 | 1.31M | if TOML_UNLIKELY(!stream_ || !read_next_block()) | 445 | 5.11k | return nullptr; | 446 | | | 447 | 1.30M | TOML_ASSERT_ASSUME(!codepoints_.current); | 448 | 1.30M | } | 449 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.count); | 450 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity); | 451 | 41.7M | TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count); | 452 | | | 453 | 41.7M | return &codepoints_.buffer[codepoints_.current++]; | 454 | 41.7M | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::read_next() |
455 | | |
456 | | TOML_NODISCARD |
457 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final |
458 | 6.35k | { |
459 | 6.35k | return stream_.peek_eof(); |
460 | 6.35k | } toml::v3::impl::utf8_reader<std::__1::basic_string_view<char, std::__1::char_traits<char> > >::peek_eof() const Line | Count | Source | 458 | 6.35k | { | 459 | 6.35k | return stream_.peek_eof(); | 460 | 6.35k | } |
Unexecuted instantiation: toml::v3::impl::utf8_reader<std::__1::basic_istream<char, std::__1::char_traits<char> > >::peek_eof() const |
461 | | |
462 | | #if !TOML_EXCEPTIONS |
463 | | |
464 | | TOML_NODISCARD |
465 | | optional<parse_error>&& error() noexcept final |
466 | | { |
467 | | return std::move(err_); |
468 | | } |
469 | | |
470 | | #endif |
471 | | }; |
472 | | |
473 | | template <typename Char> |
474 | | utf8_reader(std::basic_string_view<Char>, std::string_view) -> utf8_reader<std::basic_string_view<Char>>; |
475 | | template <typename Char> |
476 | | utf8_reader(std::basic_string_view<Char>, std::string&&) -> utf8_reader<std::basic_string_view<Char>>; |
477 | | template <typename Char> |
478 | | utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>; |
479 | | template <typename Char> |
480 | | utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>; |
481 | | |
482 | | #if TOML_EXCEPTIONS |
483 | 43.9M | #define utf8_buffered_reader_error_check(...) static_assert(true) |
484 | | #else |
485 | | #define utf8_buffered_reader_error_check(...) \ |
486 | | do \ |
487 | | { \ |
488 | | if TOML_UNLIKELY(reader_.error()) \ |
489 | | return __VA_ARGS__; \ |
490 | | } \ |
491 | | while (false) |
492 | | |
493 | | #endif |
494 | | |
495 | | class TOML_EMPTY_BASES utf8_buffered_reader |
496 | | { |
497 | | public: |
498 | | static constexpr size_t max_history_length = 128; |
499 | | |
500 | | private: |
501 | | static constexpr size_t history_buffer_size = max_history_length - 1; //'head' is stored in the reader |
502 | | utf8_reader_interface& reader_; |
503 | | struct |
504 | | { |
505 | | utf8_codepoint buffer[history_buffer_size]; |
506 | | size_t count, first; |
507 | | } history_ = {}; |
508 | | const utf8_codepoint* head_ = {}; |
509 | | size_t negative_offset_ = {}; |
510 | | |
511 | | public: |
512 | | TOML_NODISCARD_CTOR |
513 | | explicit utf8_buffered_reader(utf8_reader_interface& reader) noexcept // |
514 | 6.35k | : reader_{ reader } |
515 | 6.35k | {} |
516 | | |
517 | | TOML_PURE_INLINE_GETTER |
518 | | const source_path_ptr& source_path() const noexcept |
519 | 1.03M | { |
520 | 1.03M | return reader_.source_path(); |
521 | 1.03M | } |
522 | | |
523 | | TOML_NODISCARD |
524 | | const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
525 | 43.0M | { |
526 | 43.0M | utf8_buffered_reader_error_check({}); |
527 | | |
528 | 43.0M | if (negative_offset_) |
529 | 1.37M | { |
530 | 1.37M | negative_offset_--; |
531 | | |
532 | | // an entry negative offset of 1 just means "replay the current head" |
533 | 1.37M | if (!negative_offset_) |
534 | 858k | return head_; |
535 | | |
536 | | // otherwise step back into the history buffer |
537 | 516k | else |
538 | 516k | return history_.buffer |
539 | 516k | + ((history_.first + history_.count - negative_offset_) % history_buffer_size); |
540 | 1.37M | } |
541 | 41.7M | else |
542 | 41.7M | { |
543 | | // first character read from stream |
544 | 41.7M | if TOML_UNLIKELY(!history_.count && !head_) |
545 | 6.30k | head_ = reader_.read_next(); |
546 | | |
547 | | // subsequent characters and not eof |
548 | 41.6M | else if (head_) |
549 | 41.6M | { |
550 | 41.6M | if TOML_UNLIKELY(history_.count < history_buffer_size) |
551 | 259k | history_.buffer[history_.count++] = *head_; |
552 | 41.4M | else |
553 | 41.4M | history_.buffer[(history_.first++ + history_buffer_size) % history_buffer_size] = *head_; |
554 | | |
555 | 41.6M | head_ = reader_.read_next(); |
556 | 41.6M | } |
557 | | |
558 | 41.7M | return head_; |
559 | 41.7M | } |
560 | 43.0M | } |
561 | | |
562 | | TOML_NODISCARD |
563 | | const utf8_codepoint* step_back(size_t count) noexcept |
564 | 860k | { |
565 | 860k | utf8_buffered_reader_error_check({}); |
566 | | |
567 | 860k | TOML_ASSERT_ASSUME(history_.count); |
568 | 860k | TOML_ASSERT_ASSUME(negative_offset_ + count <= history_.count); |
569 | | |
570 | 860k | negative_offset_ += count; |
571 | | |
572 | 860k | return negative_offset_ |
573 | 860k | ? history_.buffer + ((history_.first + history_.count - negative_offset_) % history_buffer_size) |
574 | 860k | : head_; |
575 | 860k | } |
576 | | |
577 | | TOML_NODISCARD |
578 | | bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
579 | 6.35k | { |
580 | 6.35k | return reader_.peek_eof(); |
581 | 6.35k | } |
582 | | |
583 | | #if !TOML_EXCEPTIONS |
584 | | |
585 | | TOML_NODISCARD |
586 | | optional<parse_error>&& error() noexcept |
587 | | { |
588 | | return reader_.error(); |
589 | | } |
590 | | |
591 | | #endif |
592 | | }; |
593 | | } |
594 | | TOML_ANON_NAMESPACE_END; |
595 | | |
596 | | //#--------------------------------------------------------------------------------------------------------------------- |
597 | | //# PARSER INTERNAL IMPLEMENTATION |
598 | | //#--------------------------------------------------------------------------------------------------------------------- |
599 | | |
600 | | #if TOML_EXCEPTIONS |
601 | | #define TOML_RETURNS_BY_THROWING [[noreturn]] |
602 | | #else |
603 | | #define TOML_RETURNS_BY_THROWING |
604 | | #endif |
605 | | |
606 | | TOML_ANON_NAMESPACE_START |
607 | | { |
608 | | template <typename... T> |
609 | | TOML_CONST_GETTER |
610 | | TOML_INTERNAL_LINKAGE |
611 | | constexpr bool is_match(char32_t codepoint, T... vals) noexcept |
612 | 3.71M | { |
613 | 3.71M | static_assert((std::is_same_v<char32_t, T> && ...)); |
614 | 7.54M | return ((codepoint == vals) || ...); |
615 | 3.71M | } bool toml::v3::impl::is_match<char32_t, char32_t>(char32_t, char32_t, char32_t) Line | Count | Source | 612 | 3.68M | { | 613 | 3.68M | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 7.35M | return ((codepoint == vals) || ...); | 615 | 3.68M | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 11.9k | { | 613 | 11.9k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 34.7k | return ((codepoint == vals) || ...); | 615 | 11.9k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 7.34k | { | 613 | 7.34k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 58.6k | return ((codepoint == vals) || ...); | 615 | 7.34k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 17.4k | { | 613 | 17.4k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 89.0k | return ((codepoint == vals) || ...); | 615 | 17.4k | } |
bool toml::v3::impl::is_match<char32_t, char32_t, char32_t, char32_t, char32_t, char32_t>(char32_t, char32_t, char32_t, char32_t, char32_t, char32_t, char32_t) Line | Count | Source | 612 | 1.77k | { | 613 | 1.77k | static_assert((std::is_same_v<char32_t, T> && ...)); | 614 | 7.43k | return ((codepoint == vals) || ...); | 615 | 1.77k | } |
|
616 | | |
617 | | template <uint64_t> |
618 | | struct parse_integer_traits; |
619 | | template <> |
620 | | struct parse_integer_traits<2> |
621 | | { |
622 | | static constexpr auto scope_qualifier = "binary integer"sv; |
623 | | static constexpr auto is_digit = impl::is_binary_digit; |
624 | | static constexpr auto is_signed = false; |
625 | | static constexpr auto max_digits = 63; |
626 | | static constexpr auto prefix_codepoint = U'b'; |
627 | | static constexpr auto prefix = "b"sv; |
628 | | static constexpr auto full_prefix = "0b"sv; |
629 | | }; |
630 | | template <> |
631 | | struct parse_integer_traits<8> |
632 | | { |
633 | | static constexpr auto scope_qualifier = "octal integer"sv; |
634 | | static constexpr auto is_digit = impl::is_octal_digit; |
635 | | static constexpr auto is_signed = false; |
636 | | static constexpr auto max_digits = 21; // strlen("777777777777777777777") |
637 | | static constexpr auto prefix_codepoint = U'o'; |
638 | | static constexpr auto prefix = "o"sv; |
639 | | static constexpr auto full_prefix = "0o"sv; |
640 | | }; |
641 | | template <> |
642 | | struct parse_integer_traits<10> |
643 | | { |
644 | | static constexpr auto scope_qualifier = "decimal integer"sv; |
645 | | static constexpr auto is_digit = impl::is_decimal_digit; |
646 | | static constexpr auto is_signed = true; |
647 | | static constexpr auto max_digits = 19; // strlen("9223372036854775807") |
648 | | static constexpr auto full_prefix = ""sv; |
649 | | }; |
650 | | template <> |
651 | | struct parse_integer_traits<16> |
652 | | { |
653 | | static constexpr auto scope_qualifier = "hexadecimal integer"sv; |
654 | | static constexpr auto is_digit = impl::is_hexadecimal_digit; |
655 | | static constexpr auto is_signed = false; |
656 | | static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF") |
657 | | static constexpr auto prefix_codepoint = U'x'; |
658 | | static constexpr auto prefix = "x"sv; |
659 | | static constexpr auto full_prefix = "0x"sv; |
660 | | }; |
661 | | |
662 | | TOML_PURE_GETTER |
663 | | TOML_INTERNAL_LINKAGE |
664 | | std::string_view to_sv(node_type val) noexcept |
665 | 49 | { |
666 | 49 | return impl::node_type_friendly_names[impl::unwrap_enum(val)]; |
667 | 49 | } |
668 | | |
669 | | TOML_PURE_GETTER |
670 | | TOML_INTERNAL_LINKAGE |
671 | | std::string_view to_sv(const std::string& str) noexcept |
672 | 102 | { |
673 | 102 | return std::string_view{ str }; |
674 | 102 | } |
675 | | |
676 | | TOML_CONST_GETTER |
677 | | TOML_INTERNAL_LINKAGE |
678 | | std::string_view to_sv(bool val) noexcept |
679 | 19 | { |
680 | 19 | using namespace std::string_view_literals; |
681 | | |
682 | 19 | return val ? "true"sv : "false"sv; |
683 | 19 | } |
684 | | |
685 | | TOML_PURE_GETTER |
686 | | TOML_INTERNAL_LINKAGE |
687 | | std::string_view to_sv(const utf8_codepoint& cp) noexcept |
688 | 1.27k | { |
689 | 1.27k | if (cp.value <= U'\x1F') |
690 | 166 | return impl::control_char_escapes[cp.value]; |
691 | 1.11k | else if (cp.value == U'\x7F') |
692 | 43 | return "\\u007F"sv; |
693 | 1.07k | else |
694 | 1.07k | return std::string_view{ cp.bytes, cp.count }; |
695 | 1.27k | } |
696 | | |
697 | | TOML_PURE_GETTER |
698 | | TOML_INTERNAL_LINKAGE |
699 | | std::string_view to_sv(const utf8_codepoint* cp) noexcept |
700 | 387 | { |
701 | 387 | if (cp) |
702 | 387 | return to_sv(*cp); |
703 | 0 | return ""sv; |
704 | 387 | } |
705 | | |
706 | | struct escaped_codepoint |
707 | | { |
708 | | const utf8_codepoint& cp; |
709 | | }; |
710 | | |
711 | | template <typename T> |
712 | | TOML_ATTR(nonnull) |
713 | | TOML_INTERNAL_LINKAGE |
714 | | void concatenate(char*& write_pos, char* const buf_end, const T& arg) noexcept |
715 | 14.5k | { |
716 | 14.5k | if TOML_UNLIKELY(write_pos >= buf_end) |
717 | 1 | return; |
718 | | |
719 | 14.5k | using arg_type = impl::remove_cvref<T>; |
720 | | |
721 | | // string views |
722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) |
723 | 14.3k | { |
724 | 14.3k | const auto max_chars = static_cast<size_t>(buf_end - write_pos); |
725 | 14.3k | const auto len = max_chars < arg.length() ? max_chars : arg.length(); |
726 | 14.3k | std::memcpy(write_pos, arg.data(), len); |
727 | 14.3k | write_pos += len; |
728 | | } |
729 | | |
730 | | // doubles |
731 | | else if constexpr (std::is_same_v<arg_type, double>) |
732 | | { |
733 | | #if TOML_FLOAT_CHARCONV |
734 | | const auto result = std::to_chars(write_pos, buf_end, arg); |
735 | | write_pos = result.ptr; |
736 | | #else |
737 | | std::ostringstream ss; |
738 | | ss.imbue(std::locale::classic()); |
739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); |
740 | | ss << arg; |
741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); |
742 | | #endif |
743 | | } |
744 | | |
745 | | // 64-bit integers |
746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) |
747 | 52 | { |
748 | 52 | #if TOML_INT_CHARCONV |
749 | 52 | const auto result = std::to_chars(write_pos, buf_end, arg); |
750 | 52 | write_pos = result.ptr; |
751 | | #else |
752 | | std::ostringstream ss; |
753 | | ss.imbue(std::locale::classic()); |
754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; |
755 | | ss << static_cast<cast_type>(arg); |
756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); |
757 | | #endif |
758 | | } |
759 | | |
760 | | // escaped_codepoint |
761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) |
762 | 79 | { |
763 | 79 | if (arg.cp.value <= U'\x7F') |
764 | 18 | concatenate(write_pos, buf_end, to_sv(arg.cp)); |
765 | 61 | else |
766 | 61 | { |
767 | 61 | auto val = static_cast<uint_least32_t>(arg.cp.value); |
768 | 61 | const auto digits = val > 0xFFFFu ? 8u : 4u; |
769 | 61 | constexpr auto mask = uint_least32_t{ 0xFu }; |
770 | 61 | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; |
771 | 333 | for (auto i = 2u + digits; i-- > 2u;) |
772 | 272 | { |
773 | 272 | const auto hexdig = val & mask; |
774 | 272 | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); |
775 | 272 | val >>= 4; |
776 | 272 | } |
777 | 61 | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); |
778 | 61 | } |
779 | | } |
780 | | |
781 | | // all other floats (fallback - coerce to double) |
782 | | else if constexpr (std::is_floating_point_v<arg_type>) |
783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); |
784 | | |
785 | | // all other integers (fallback - coerce to (u)int64_t) |
786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) |
787 | 35 | { |
788 | 35 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; |
789 | 35 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); |
790 | | } |
791 | | |
792 | | else |
793 | | { |
794 | | static_assert( |
795 | | impl::always_false<T>, |
796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); |
797 | | } |
798 | 14.5k | } void toml::v3::impl::concatenate<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(char*&, char*, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Line | Count | Source | 715 | 14.3k | { | 716 | 14.3k | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 1 | return; | 718 | | | 719 | 14.3k | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | 14.3k | { | 724 | 14.3k | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | 14.3k | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | 14.3k | std::memcpy(write_pos, arg.data(), len); | 727 | 14.3k | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 14.3k | } |
void toml::v3::impl::concatenate<toml::v3::impl::escaped_codepoint>(char*&, char*, toml::v3::impl::escaped_codepoint const&) Line | Count | Source | 715 | 79 | { | 716 | 79 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 79 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | 79 | { | 763 | 79 | if (arg.cp.value <= U'\x7F') | 764 | 18 | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | 61 | else | 766 | 61 | { | 767 | 61 | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | 61 | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | 61 | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | 61 | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | 333 | for (auto i = 2u + digits; i-- > 2u;) | 772 | 272 | { | 773 | 272 | const auto hexdig = val & mask; | 774 | 272 | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | 272 | val >>= 4; | 776 | 272 | } | 777 | 61 | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | 61 | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 79 | } |
void toml::v3::impl::concatenate<unsigned long>(char*&, char*, unsigned long const&) Line | Count | Source | 715 | 45 | { | 716 | 45 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 45 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | 45 | { | 748 | 45 | #if TOML_INT_CHARCONV | 749 | 45 | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | 45 | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 45 | } |
void toml::v3::impl::concatenate<unsigned int>(char*&, char*, unsigned int const&) Line | Count | Source | 715 | 28 | { | 716 | 28 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 28 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | 28 | { | 788 | 28 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | 28 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 28 | } |
void toml::v3::impl::concatenate<int>(char*&, char*, int const&) Line | Count | Source | 715 | 7 | { | 716 | 7 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 7 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | | { | 748 | | #if TOML_INT_CHARCONV | 749 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | 7 | { | 788 | 7 | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | 7 | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 7 | } |
void toml::v3::impl::concatenate<long>(char*&, char*, long const&) Line | Count | Source | 715 | 7 | { | 716 | 7 | if TOML_UNLIKELY(write_pos >= buf_end) | 717 | 0 | return; | 718 | | | 719 | 7 | using arg_type = impl::remove_cvref<T>; | 720 | | | 721 | | // string views | 722 | | if constexpr (std::is_same_v<arg_type, std::string_view>) | 723 | | { | 724 | | const auto max_chars = static_cast<size_t>(buf_end - write_pos); | 725 | | const auto len = max_chars < arg.length() ? max_chars : arg.length(); | 726 | | std::memcpy(write_pos, arg.data(), len); | 727 | | write_pos += len; | 728 | | } | 729 | | | 730 | | // doubles | 731 | | else if constexpr (std::is_same_v<arg_type, double>) | 732 | | { | 733 | | #if TOML_FLOAT_CHARCONV | 734 | | const auto result = std::to_chars(write_pos, buf_end, arg); | 735 | | write_pos = result.ptr; | 736 | | #else | 737 | | std::ostringstream ss; | 738 | | ss.imbue(std::locale::classic()); | 739 | | ss.precision(std::numeric_limits<arg_type>::max_digits10); | 740 | | ss << arg; | 741 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 742 | | #endif | 743 | | } | 744 | | | 745 | | // 64-bit integers | 746 | | else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) | 747 | 7 | { | 748 | 7 | #if TOML_INT_CHARCONV | 749 | 7 | const auto result = std::to_chars(write_pos, buf_end, arg); | 750 | 7 | write_pos = result.ptr; | 751 | | #else | 752 | | std::ostringstream ss; | 753 | | ss.imbue(std::locale::classic()); | 754 | | using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>; | 755 | | ss << static_cast<cast_type>(arg); | 756 | | concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); | 757 | | #endif | 758 | | } | 759 | | | 760 | | // escaped_codepoint | 761 | | else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) | 762 | | { | 763 | | if (arg.cp.value <= U'\x7F') | 764 | | concatenate(write_pos, buf_end, to_sv(arg.cp)); | 765 | | else | 766 | | { | 767 | | auto val = static_cast<uint_least32_t>(arg.cp.value); | 768 | | const auto digits = val > 0xFFFFu ? 8u : 4u; | 769 | | constexpr auto mask = uint_least32_t{ 0xFu }; | 770 | | char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; | 771 | | for (auto i = 2u + digits; i-- > 2u;) | 772 | | { | 773 | | const auto hexdig = val & mask; | 774 | | buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); | 775 | | val >>= 4; | 776 | | } | 777 | | concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); | 778 | | } | 779 | | } | 780 | | | 781 | | // all other floats (fallback - coerce to double) | 782 | | else if constexpr (std::is_floating_point_v<arg_type>) | 783 | | concatenate(write_pos, buf_end, static_cast<double>(arg)); | 784 | | | 785 | | // all other integers (fallback - coerce to (u)int64_t) | 786 | | else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) | 787 | | { | 788 | | using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>; | 789 | | concatenate(write_pos, buf_end, static_cast<cast_type>(arg)); | 790 | | } | 791 | | | 792 | | else | 793 | | { | 794 | | static_assert( | 795 | | impl::always_false<T>, | 796 | | "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint"); | 797 | | } | 798 | 7 | } |
|
799 | | |
800 | | struct error_builder |
801 | | { |
802 | | static constexpr std::size_t buf_size = 512; |
803 | | char buf[buf_size]; |
804 | | char* write_pos = buf; |
805 | | char* const max_write_pos = buf + (buf_size - std::size_t{ 1 }); // allow for null terminator |
806 | | |
807 | | TOML_NODISCARD_CTOR |
808 | | error_builder(std::string_view scope) noexcept |
809 | 2.76k | { |
810 | 2.76k | concatenate(write_pos, max_write_pos, "Error while parsing "sv); |
811 | 2.76k | concatenate(write_pos, max_write_pos, scope); |
812 | 2.76k | concatenate(write_pos, max_write_pos, ": "sv); |
813 | 2.76k | } |
814 | | |
815 | | template <typename T> |
816 | | void append(const T& arg) noexcept |
817 | 6.14k | { |
818 | 6.14k | concatenate(write_pos, max_write_pos, arg); |
819 | 6.14k | } void toml::v3::impl::error_builder::append<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Line | Count | Source | 817 | 6.01k | { | 818 | 6.01k | concatenate(write_pos, max_write_pos, arg); | 819 | 6.01k | } |
void toml::v3::impl::error_builder::append<toml::v3::impl::escaped_codepoint>(toml::v3::impl::escaped_codepoint const&) Line | Count | Source | 817 | 79 | { | 818 | 79 | concatenate(write_pos, max_write_pos, arg); | 819 | 79 | } |
void toml::v3::impl::error_builder::append<unsigned long>(unsigned long const&) Line | Count | Source | 817 | 17 | { | 818 | 17 | concatenate(write_pos, max_write_pos, arg); | 819 | 17 | } |
void toml::v3::impl::error_builder::append<unsigned int>(unsigned int const&) Line | Count | Source | 817 | 28 | { | 818 | 28 | concatenate(write_pos, max_write_pos, arg); | 819 | 28 | } |
void toml::v3::impl::error_builder::append<int>(int const&) Line | Count | Source | 817 | 7 | { | 818 | 7 | concatenate(write_pos, max_write_pos, arg); | 819 | 7 | } |
|
820 | | |
821 | | TOML_RETURNS_BY_THROWING |
822 | | auto finish(const source_position& pos, const source_path_ptr& source_path) const |
823 | 2.76k | { |
824 | 2.76k | *write_pos = '\0'; |
825 | | |
826 | 2.76k | #if TOML_EXCEPTIONS |
827 | 2.76k | throw parse_error{ buf, pos, source_path }; |
828 | | #else |
829 | | return parse_error{ std::string(buf, static_cast<size_t>(write_pos - buf)), pos, source_path }; |
830 | | #endif |
831 | 2.76k | } |
832 | | |
833 | | TOML_DELETE_DEFAULTS(error_builder); |
834 | | }; |
835 | | |
836 | | struct parse_scope |
837 | | { |
838 | | std::string_view& storage_; |
839 | | std::string_view parent_; |
840 | | |
841 | | TOML_NODISCARD_CTOR |
842 | | explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept |
843 | 1.22M | : storage_{ current_scope }, |
844 | 1.22M | parent_{ current_scope } |
845 | 1.22M | { |
846 | 1.22M | storage_ = new_scope; |
847 | 1.22M | } |
848 | | |
849 | | ~parse_scope() noexcept |
850 | 1.22M | { |
851 | 1.22M | storage_ = parent_; |
852 | 1.22M | } |
853 | | |
854 | | TOML_DELETE_DEFAULTS(parse_scope); |
855 | | }; |
856 | 1.22M | #define push_parse_scope_2(scope, line) parse_scope ps_##line(current_scope, scope) |
857 | 1.22M | #define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line) |
858 | 1.22M | #define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__) |
859 | | |
860 | | struct parse_key_buffer |
861 | | { |
862 | | std::string buffer; |
863 | | std::vector<std::pair<size_t, size_t>> segments; |
864 | | std::vector<source_position> starts; |
865 | | std::vector<source_position> ends; |
866 | | |
867 | | void clear() noexcept |
868 | 103k | { |
869 | 103k | buffer.clear(); |
870 | 103k | segments.clear(); |
871 | 103k | starts.clear(); |
872 | 103k | ends.clear(); |
873 | 103k | } |
874 | | |
875 | | void push_back(std::string_view segment, source_position b, source_position e) |
876 | 696k | { |
877 | 696k | segments.push_back({ buffer.length(), segment.length() }); |
878 | 696k | buffer.append(segment); |
879 | 696k | starts.push_back(b); |
880 | 696k | ends.push_back(e); |
881 | 696k | } |
882 | | |
883 | | TOML_PURE_INLINE_GETTER |
884 | | std::string_view operator[](size_t i) const noexcept |
885 | 1.28M | { |
886 | 1.28M | return std::string_view{ buffer.c_str() + segments[i].first, segments[i].second }; |
887 | 1.28M | } |
888 | | |
889 | | TOML_PURE_INLINE_GETTER |
890 | | std::string_view back() const noexcept |
891 | 102k | { |
892 | 102k | return (*this)[segments.size() - 1u]; |
893 | 102k | } |
894 | | |
895 | | TOML_PURE_INLINE_GETTER |
896 | | bool empty() const noexcept |
897 | 12 | { |
898 | 12 | return segments.empty(); |
899 | 12 | } |
900 | | |
901 | | TOML_PURE_INLINE_GETTER |
902 | | size_t size() const noexcept |
903 | 1.35M | { |
904 | 1.35M | return segments.size(); |
905 | 1.35M | } |
906 | | }; |
907 | | |
908 | | struct depth_counter_scope |
909 | | { |
910 | | size_t& depth_; |
911 | | |
912 | | TOML_NODISCARD_CTOR |
913 | | explicit depth_counter_scope(size_t& depth) noexcept // |
914 | 876k | : depth_{ depth } |
915 | 876k | { |
916 | 876k | depth_++; |
917 | 876k | } |
918 | | |
919 | | ~depth_counter_scope() noexcept |
920 | 876k | { |
921 | 876k | depth_--; |
922 | 876k | } |
923 | | |
924 | | TOML_DELETE_DEFAULTS(depth_counter_scope); |
925 | | }; |
926 | | |
927 | | struct parsed_string |
928 | | { |
929 | | std::string_view value; |
930 | | bool was_multi_line; |
931 | | }; |
932 | | |
933 | | struct table_vector_scope |
934 | | { |
935 | | std::vector<table*>& tables; |
936 | | |
937 | | TOML_NODISCARD_CTOR |
938 | | explicit table_vector_scope(std::vector<table*>& tables_, table& tbl) // |
939 | 7.54k | : tables{ tables_ } |
940 | 7.54k | { |
941 | 7.54k | tables.push_back(&tbl); |
942 | 7.54k | } |
943 | | |
944 | | ~table_vector_scope() noexcept |
945 | 7.54k | { |
946 | 7.54k | tables.pop_back(); |
947 | 7.54k | } |
948 | | |
949 | | TOML_DELETE_DEFAULTS(table_vector_scope); |
950 | | }; |
951 | | } |
952 | | TOML_ANON_NAMESPACE_END; |
953 | | |
954 | | #if 1 // parser helper macros |
955 | | |
956 | | // Q: "what the fuck is this? MACROS????" |
957 | | // A: The parser needs to work in exceptionless mode (returning error objects directly) |
958 | | // and exception mode (reporting parse failures by throwing). Two totally different control flows. |
959 | | // These macros encapsulate the differences between the two modes so I can write code code |
960 | | // as though I was only targeting one mode and not want yeet myself into the sun. |
961 | | // They're all #undef'd at the bottom of the parser's implementation so they should be harmless outside |
962 | | // of toml++. |
963 | | |
964 | 86.5M | #define is_eof() !cp |
965 | 88.1M | #define assert_not_eof() TOML_ASSERT_ASSUME(cp != nullptr) |
966 | | #define return_if_eof(...) \ |
967 | 8.90M | do \ |
968 | 8.90M | { \ |
969 | 8.90M | if TOML_UNLIKELY(is_eof()) \ |
970 | 8.90M | return __VA_ARGS__; \ |
971 | 8.90M | } \ |
972 | 8.90M | while (false) |
973 | | |
974 | | #if TOML_EXCEPTIONS |
975 | 2.40M | #define is_error() false |
976 | 2.63k | #define return_after_error(...) TOML_UNREACHABLE |
977 | 6.24k | #define assert_not_error() static_assert(true) |
978 | 113M | #define return_if_error(...) static_assert(true) |
979 | 8.90M | #define return_if_error_or_eof(...) return_if_eof(__VA_ARGS__) |
980 | | #else |
981 | | #define is_error() !!err |
982 | | #define return_after_error(...) return __VA_ARGS__ |
983 | | #define assert_not_error() TOML_ASSERT(!is_error()) |
984 | | #define return_if_error(...) \ |
985 | | do \ |
986 | | { \ |
987 | | if TOML_UNLIKELY(is_error()) \ |
988 | | return __VA_ARGS__; \ |
989 | | } \ |
990 | | while (false) |
991 | | #define return_if_error_or_eof(...) \ |
992 | | do \ |
993 | | { \ |
994 | | if TOML_UNLIKELY(is_eof() || is_error()) \ |
995 | | return __VA_ARGS__; \ |
996 | | } \ |
997 | | while (false) |
998 | | #endif |
999 | | |
1000 | | #if defined(TOML_BREAK_AT_PARSE_ERRORS) && TOML_BREAK_AT_PARSE_ERRORS |
1001 | | #if defined(__has_builtin) |
1002 | | #if __has_builtin(__builtin_debugtrap) |
1003 | | #define parse_error_break() __builtin_debugtrap() |
1004 | | #elif __has_builtin(__debugbreak) |
1005 | | #define parse_error_break() __debugbreak() |
1006 | | #endif |
1007 | | #endif |
1008 | | #ifndef parse_error_break |
1009 | | #if TOML_MSVC || TOML_ICC |
1010 | | #define parse_error_break() __debugbreak() |
1011 | | #else |
1012 | | #define parse_error_break() TOML_ASSERT(false) |
1013 | | #endif |
1014 | | #endif |
1015 | | #else |
1016 | 2.76k | #define parse_error_break() static_assert(true) |
1017 | | #endif |
1018 | | |
1019 | | #define set_error_and_return(ret, ...) \ |
1020 | 2.37k | do \ |
1021 | 2.37k | { \ |
1022 | 2.37k | if (!is_error()) \ |
1023 | 2.37k | set_error(__VA_ARGS__); \ |
1024 | 2.37k | return_after_error(ret); \ |
1025 | 2.37k | } \ |
1026 | 2.37k | while (false) |
1027 | | |
1028 | 1.89k | #define set_error_and_return_default(...) set_error_and_return({}, __VA_ARGS__) |
1029 | | |
1030 | | #define set_error_and_return_if_eof(...) \ |
1031 | 4.50M | do \ |
1032 | 4.50M | { \ |
1033 | 4.50M | if TOML_UNLIKELY(is_eof()) \ |
1034 | 4.50M | set_error_and_return(__VA_ARGS__, "encountered end-of-file"sv); \ |
1035 | 4.50M | } \ |
1036 | 4.50M | while (false) |
1037 | | |
1038 | | #define advance_and_return_if_error(...) \ |
1039 | 40.4M | do \ |
1040 | 40.4M | { \ |
1041 | 40.4M | assert_not_eof(); \ |
1042 | 40.4M | advance(); \ |
1043 | 40.4M | return_if_error(__VA_ARGS__); \ |
1044 | 40.4M | } \ |
1045 | 40.4M | while (false) |
1046 | | |
1047 | | #define advance_and_return_if_error_or_eof(...) \ |
1048 | 1.78M | do \ |
1049 | 1.78M | { \ |
1050 | 1.78M | assert_not_eof(); \ |
1051 | 1.78M | advance(); \ |
1052 | 1.78M | return_if_error(__VA_ARGS__); \ |
1053 | 1.78M | set_error_and_return_if_eof(__VA_ARGS__); \ |
1054 | 1.78M | } \ |
1055 | 1.78M | while (false) |
1056 | | |
1057 | | #endif // parser helper macros |
1058 | | |
1059 | | TOML_IMPL_NAMESPACE_START |
1060 | | { |
1061 | | TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex); |
1062 | | |
1063 | | class parser |
1064 | | { |
1065 | | private: |
1066 | | static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES; |
1067 | | static constexpr size_t max_dotted_keys_depth = TOML_MAX_DOTTED_KEYS_DEPTH; |
1068 | | |
1069 | | utf8_buffered_reader reader; |
1070 | | table root; |
1071 | | source_position prev_pos = { 1, 1 }; |
1072 | | const utf8_codepoint* cp = {}; |
1073 | | std::vector<table*> implicit_tables; |
1074 | | std::vector<table*> dotted_key_tables; |
1075 | | std::vector<table*> open_inline_tables; |
1076 | | std::vector<array*> table_arrays; |
1077 | | parse_key_buffer key_buffer; |
1078 | | std::string string_buffer; |
1079 | | std::string recording_buffer; // for diagnostics |
1080 | | bool recording = false, recording_whitespace = true; |
1081 | | std::string_view current_scope; |
1082 | | size_t nested_values = {}; |
1083 | | #if !TOML_EXCEPTIONS |
1084 | | mutable optional<parse_error> err; |
1085 | | #endif |
1086 | | |
1087 | | TOML_NODISCARD |
1088 | | source_position current_position(source_index fallback_offset = 0) const noexcept |
1089 | 2.33M | { |
1090 | 2.33M | if (!is_eof()) |
1091 | 2.33M | return cp->position; |
1092 | 7.65k | return { prev_pos.line, static_cast<source_index>(prev_pos.column + fallback_offset) }; |
1093 | 2.33M | } |
1094 | | |
1095 | | template <typename... T> |
1096 | | TOML_RETURNS_BY_THROWING |
1097 | | TOML_NEVER_INLINE |
1098 | | void set_error_at(source_position pos, const T&... reason) const |
1099 | 2.76k | { |
1100 | 2.76k | static_assert(sizeof...(T) > 0); |
1101 | 2.76k | return_if_error(); |
1102 | | |
1103 | 2.76k | error_builder builder{ current_scope }; |
1104 | 2.76k | (builder.append(reason), ...); |
1105 | | |
1106 | 2.76k | parse_error_break(); |
1107 | | |
1108 | 2.76k | #if TOML_EXCEPTIONS |
1109 | 2.76k | builder.finish(pos, reader.source_path()); |
1110 | | #else |
1111 | | err.emplace(builder.finish(pos, reader.source_path())); |
1112 | | #endif |
1113 | 2.76k | } void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, toml::v3::impl::escaped_codepoint, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, toml::v3::impl::escaped_codepoint const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 79 | { | 1100 | 79 | static_assert(sizeof...(T) > 0); | 1101 | 79 | return_if_error(); | 1102 | | | 1103 | 79 | error_builder builder{ current_scope }; | 1104 | 79 | (builder.append(reason), ...); | 1105 | | | 1106 | 79 | parse_error_break(); | 1107 | | | 1108 | 79 | #if TOML_EXCEPTIONS | 1109 | 79 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 79 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 1.19k | { | 1100 | 1.19k | static_assert(sizeof...(T) > 0); | 1101 | 1.19k | return_if_error(); | 1102 | | | 1103 | 1.19k | error_builder builder{ current_scope }; | 1104 | 1.19k | (builder.append(reason), ...); | 1105 | | | 1106 | 1.19k | parse_error_break(); | 1107 | | | 1108 | 1.19k | #if TOML_EXCEPTIONS | 1109 | 1.19k | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 1.19k | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 13 | { | 1100 | 13 | static_assert(sizeof...(T) > 0); | 1101 | 13 | return_if_error(); | 1102 | | | 1103 | 13 | error_builder builder{ current_scope }; | 1104 | 13 | (builder.append(reason), ...); | 1105 | | | 1106 | 13 | parse_error_break(); | 1107 | | | 1108 | 13 | #if TOML_EXCEPTIONS | 1109 | 13 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 13 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 1.30k | { | 1100 | 1.30k | static_assert(sizeof...(T) > 0); | 1101 | 1.30k | return_if_error(); | 1102 | | | 1103 | 1.30k | error_builder builder{ current_scope }; | 1104 | 1.30k | (builder.append(reason), ...); | 1105 | | | 1106 | 1.30k | parse_error_break(); | 1107 | | | 1108 | 1.30k | #if TOML_EXCEPTIONS | 1109 | 1.30k | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 1.30k | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 26 | { | 1100 | 26 | static_assert(sizeof...(T) > 0); | 1101 | 26 | return_if_error(); | 1102 | | | 1103 | 26 | error_builder builder{ current_scope }; | 1104 | 26 | (builder.append(reason), ...); | 1105 | | | 1106 | 26 | parse_error_break(); | 1107 | | | 1108 | 26 | #if TOML_EXCEPTIONS | 1109 | 26 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 26 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 60 | { | 1100 | 60 | static_assert(sizeof...(T) > 0); | 1101 | 60 | return_if_error(); | 1102 | | | 1103 | 60 | error_builder builder{ current_scope }; | 1104 | 60 | (builder.append(reason), ...); | 1105 | | | 1106 | 60 | parse_error_break(); | 1107 | | | 1108 | 60 | #if TOML_EXCEPTIONS | 1109 | 60 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 60 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 55 | { | 1100 | 55 | static_assert(sizeof...(T) > 0); | 1101 | 55 | return_if_error(); | 1102 | | | 1103 | 55 | error_builder builder{ current_scope }; | 1104 | 55 | (builder.append(reason), ...); | 1105 | | | 1106 | 55 | parse_error_break(); | 1107 | | | 1108 | 55 | #if TOML_EXCEPTIONS | 1109 | 55 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 55 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1099 | 2 | { | 1100 | 2 | static_assert(sizeof...(T) > 0); | 1101 | 2 | return_if_error(); | 1102 | | | 1103 | 2 | error_builder builder{ current_scope }; | 1104 | 2 | (builder.append(reason), ...); | 1105 | | | 1106 | 2 | parse_error_break(); | 1107 | | | 1108 | 2 | #if TOML_EXCEPTIONS | 1109 | 2 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1099 | 16 | { | 1100 | 16 | static_assert(sizeof...(T) > 0); | 1101 | 16 | return_if_error(); | 1102 | | | 1103 | 16 | error_builder builder{ current_scope }; | 1104 | 16 | (builder.append(reason), ...); | 1105 | | | 1106 | 16 | parse_error_break(); | 1107 | | | 1108 | 16 | #if TOML_EXCEPTIONS | 1109 | 16 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 16 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&) const Line | Count | Source | 1099 | 2 | { | 1100 | 2 | static_assert(sizeof...(T) > 0); | 1101 | 2 | return_if_error(); | 1102 | | | 1103 | 2 | error_builder builder{ current_scope }; | 1104 | 2 | (builder.append(reason), ...); | 1105 | | | 1106 | 2 | parse_error_break(); | 1107 | | | 1108 | 2 | #if TOML_EXCEPTIONS | 1109 | 2 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int, std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1099 | 6 | { | 1100 | 6 | static_assert(sizeof...(T) > 0); | 1101 | 6 | return_if_error(); | 1102 | | | 1103 | 6 | error_builder builder{ current_scope }; | 1104 | 6 | (builder.append(reason), ...); | 1105 | | | 1106 | 6 | parse_error_break(); | 1107 | | | 1108 | 6 | #if TOML_EXCEPTIONS | 1109 | 6 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 6 | } |
void toml::v3::impl::impl_ex::parser::set_error_at<std::__1::basic_string_view<char, std::__1::char_traits<char> >, int>(toml::v3::source_position, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, int const&) const Line | Count | Source | 1099 | 7 | { | 1100 | 7 | static_assert(sizeof...(T) > 0); | 1101 | 7 | return_if_error(); | 1102 | | | 1103 | 7 | error_builder builder{ current_scope }; | 1104 | 7 | (builder.append(reason), ...); | 1105 | | | 1106 | 7 | parse_error_break(); | 1107 | | | 1108 | 7 | #if TOML_EXCEPTIONS | 1109 | 7 | builder.finish(pos, reader.source_path()); | 1110 | | #else | 1111 | | err.emplace(builder.finish(pos, reader.source_path())); | 1112 | | #endif | 1113 | 7 | } |
|
1114 | | |
1115 | | template <typename... T> |
1116 | | TOML_RETURNS_BY_THROWING |
1117 | | void set_error(const T&... reason) const |
1118 | 2.51k | { |
1119 | 2.51k | set_error_at(current_position(1), reason...); |
1120 | 2.51k | } void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, toml::v3::impl::escaped_codepoint, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, toml::v3::impl::escaped_codepoint const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 79 | { | 1119 | 79 | set_error_at(current_position(1), reason...); | 1120 | 79 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 1.00k | { | 1119 | 1.00k | set_error_at(current_position(1), reason...); | 1120 | 1.00k | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 13 | { | 1119 | 13 | set_error_at(current_position(1), reason...); | 1120 | 13 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 13 | { | 1119 | 13 | set_error_at(current_position(1), reason...); | 1120 | 13 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 60 | { | 1119 | 60 | set_error_at(current_position(1), reason...); | 1120 | 60 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 55 | { | 1119 | 55 | set_error_at(current_position(1), reason...); | 1120 | 55 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 2 | { | 1119 | 2 | set_error_at(current_position(1), reason...); | 1120 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1118 | 16 | { | 1119 | 16 | set_error_at(current_position(1), reason...); | 1120 | 16 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned long>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long const&) const Line | Count | Source | 1118 | 2 | { | 1119 | 2 | set_error_at(current_position(1), reason...); | 1120 | 2 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int, std::__1::basic_string_view<char, std::__1::char_traits<char> >, unsigned int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned int const&) const Line | Count | Source | 1118 | 6 | { | 1119 | 6 | set_error_at(current_position(1), reason...); | 1120 | 6 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, int>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, int const&) const Line | Count | Source | 1118 | 7 | { | 1119 | 7 | set_error_at(current_position(1), reason...); | 1120 | 7 | } |
void toml::v3::impl::impl_ex::parser::set_error<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 1118 | 1.26k | { | 1119 | 1.26k | set_error_at(current_position(1), reason...); | 1120 | 1.26k | } |
|
1121 | | |
1122 | | void go_back(size_t count = 1) noexcept |
1123 | 860k | { |
1124 | 860k | return_if_error(); |
1125 | 860k | TOML_ASSERT_ASSUME(count); |
1126 | | |
1127 | 860k | cp = reader.step_back(count); |
1128 | 860k | prev_pos = cp->position; |
1129 | 860k | } |
1130 | | |
1131 | | void advance() |
1132 | 43.0M | { |
1133 | 43.0M | return_if_error(); |
1134 | 43.0M | assert_not_eof(); |
1135 | | |
1136 | 43.0M | prev_pos = cp->position; |
1137 | 43.0M | cp = reader.read_next(); |
1138 | | |
1139 | | #if !TOML_EXCEPTIONS |
1140 | | if (reader.error()) |
1141 | | { |
1142 | | err = std::move(reader.error()); |
1143 | | return; |
1144 | | } |
1145 | | #endif |
1146 | | |
1147 | 43.0M | if (recording && !is_eof()) |
1148 | 29.4M | { |
1149 | 29.4M | if (recording_whitespace || !is_whitespace(*cp)) |
1150 | 29.4M | recording_buffer.append(cp->bytes, cp->count); |
1151 | 29.4M | } |
1152 | 43.0M | } |
1153 | | |
1154 | | void start_recording(bool include_current = true) noexcept |
1155 | 106k | { |
1156 | 106k | return_if_error(); |
1157 | | |
1158 | 106k | recording = true; |
1159 | 106k | recording_whitespace = true; |
1160 | 106k | recording_buffer.clear(); |
1161 | 106k | if (include_current && !is_eof()) |
1162 | 106k | recording_buffer.append(cp->bytes, cp->count); |
1163 | 106k | } |
1164 | | |
1165 | | void stop_recording(size_t pop_bytes = 0) noexcept |
1166 | 105k | { |
1167 | 105k | return_if_error(); |
1168 | | |
1169 | 105k | recording = false; |
1170 | 105k | if (pop_bytes) |
1171 | 102k | { |
1172 | 102k | if (pop_bytes >= recording_buffer.length()) |
1173 | 58 | recording_buffer.clear(); |
1174 | 102k | else if (pop_bytes == 1u) |
1175 | 102k | recording_buffer.pop_back(); |
1176 | 0 | else |
1177 | 0 | recording_buffer.erase(recording_buffer.begin() |
1178 | 0 | + static_cast<ptrdiff_t>(recording_buffer.length() - pop_bytes), |
1179 | 0 | recording_buffer.end()); |
1180 | 102k | } |
1181 | 105k | } |
1182 | | |
1183 | | bool consume_leading_whitespace() |
1184 | 3.47M | { |
1185 | 3.47M | return_if_error_or_eof({}); |
1186 | | |
1187 | 3.47M | bool consumed = false; |
1188 | 3.53M | while (!is_eof() && is_horizontal_whitespace(*cp)) |
1189 | 57.9k | { |
1190 | 57.9k | if TOML_UNLIKELY(!is_ascii_horizontal_whitespace(*cp)) |
1191 | 57.9k | set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{ *cp }, "'"sv); |
1192 | | |
1193 | 57.9k | consumed = true; |
1194 | 57.9k | advance_and_return_if_error({}); |
1195 | 57.9k | } |
1196 | 3.47M | return consumed; |
1197 | 3.47M | } |
1198 | | |
1199 | | bool consume_line_break() |
1200 | 3.55M | { |
1201 | 3.55M | return_if_error_or_eof({}); |
1202 | | |
1203 | 3.55M | if TOML_UNLIKELY(is_match(*cp, U'\v', U'\f')) |
1204 | 3.55M | set_error_and_return_default( |
1205 | 3.55M | R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv); |
1206 | | |
1207 | 3.55M | if (*cp == U'\r') |
1208 | 305 | { |
1209 | 305 | advance_and_return_if_error({}); // skip \r |
1210 | | |
1211 | 305 | if TOML_UNLIKELY(is_eof()) |
1212 | 305 | set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv); |
1213 | | |
1214 | 302 | if TOML_UNLIKELY(*cp != U'\n') |
1215 | 302 | set_error_and_return_default("expected '\\n' after '\\r', saw '"sv, |
1216 | 302 | escaped_codepoint{ *cp }, |
1217 | 302 | "'"sv); |
1218 | 302 | } |
1219 | 3.55M | else if (*cp != U'\n') |
1220 | 1.98M | return false; |
1221 | | |
1222 | 1.56M | advance_and_return_if_error({}); // skip \n |
1223 | 1.56M | return true; |
1224 | 3.55M | } |
1225 | | |
1226 | | bool consume_rest_of_line() |
1227 | 0 | { |
1228 | 0 | return_if_error_or_eof({}); |
1229 | 0 |
|
1230 | 0 | do |
1231 | 0 | { |
1232 | 0 | if (is_ascii_vertical_whitespace(*cp)) |
1233 | 0 | return consume_line_break(); |
1234 | 0 | else |
1235 | 0 | advance(); |
1236 | 0 | return_if_error({}); |
1237 | 0 | } |
1238 | 0 | while (!is_eof()); |
1239 | 0 |
|
1240 | 0 | return true; |
1241 | 0 | } |
1242 | | |
1243 | | bool consume_comment() |
1244 | 1.86M | { |
1245 | 1.86M | return_if_error_or_eof({}); |
1246 | | |
1247 | 1.86M | if (*cp != U'#') |
1248 | 1.86M | return false; |
1249 | | |
1250 | 4.53k | push_parse_scope("comment"sv); |
1251 | | |
1252 | 4.53k | advance_and_return_if_error({}); // skip the '#' |
1253 | | |
1254 | 208k | while (!is_eof()) |
1255 | 208k | { |
1256 | 208k | if (consume_line_break()) |
1257 | 4.27k | return true; |
1258 | 203k | return_if_error({}); |
1259 | | |
1260 | 203k | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1261 | | |
1262 | | // toml/issues/567 (disallow non-TAB control characters in comments) |
1263 | 203k | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1264 | 203k | set_error_and_return_default( |
1265 | 203k | "control characters other than TAB (U+0009) are explicitly prohibited in comments"sv); |
1266 | | |
1267 | | // toml/pull/720 (disallow surrogates in comments) |
1268 | 203k | else if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1269 | 0 | set_error_and_return_default( |
1270 | 203k | "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited in comments"sv); |
1271 | 203k | #endif |
1272 | | |
1273 | 203k | advance_and_return_if_error({}); |
1274 | 203k | } |
1275 | | |
1276 | 252 | return true; |
1277 | 4.53k | } |
1278 | | |
1279 | | TOML_NODISCARD |
1280 | | bool consume_expected_sequence(std::u32string_view seq) |
1281 | 3.48k | { |
1282 | 3.48k | return_if_error({}); |
1283 | 3.48k | TOML_ASSERT(!seq.empty()); |
1284 | | |
1285 | 3.48k | for (auto c : seq) |
1286 | 12.7k | { |
1287 | 12.7k | set_error_and_return_if_eof({}); |
1288 | 12.7k | if (*cp != c) |
1289 | 43 | return false; |
1290 | 12.6k | advance_and_return_if_error({}); |
1291 | 12.6k | } |
1292 | 3.41k | return true; |
1293 | 3.48k | } |
1294 | | |
1295 | | template <typename T> |
1296 | | TOML_NODISCARD |
1297 | | bool consume_digit_sequence(T* digits, size_t len) |
1298 | 45.9k | { |
1299 | 45.9k | return_if_error({}); |
1300 | 45.9k | TOML_ASSERT_ASSUME(digits); |
1301 | 45.9k | TOML_ASSERT_ASSUME(len); |
1302 | | |
1303 | 151k | for (size_t i = 0; i < len; i++) |
1304 | 105k | { |
1305 | 105k | set_error_and_return_if_eof({}); |
1306 | 105k | if (!is_decimal_digit(*cp)) |
1307 | 216 | return false; |
1308 | | |
1309 | 105k | digits[i] = static_cast<T>(*cp - U'0'); |
1310 | 105k | advance_and_return_if_error({}); |
1311 | 105k | } |
1312 | 45.7k | return true; |
1313 | 45.9k | } bool toml::v3::impl::impl_ex::parser::consume_digit_sequence<unsigned int>(unsigned int*, unsigned long) Line | Count | Source | 1298 | 41.6k | { | 1299 | 41.6k | return_if_error({}); | 1300 | 41.6k | TOML_ASSERT_ASSUME(digits); | 1301 | 41.6k | TOML_ASSERT_ASSUME(len); | 1302 | | | 1303 | 138k | for (size_t i = 0; i < len; i++) | 1304 | 97.4k | { | 1305 | 97.4k | set_error_and_return_if_eof({}); | 1306 | 97.4k | if (!is_decimal_digit(*cp)) | 1307 | 187 | return false; | 1308 | | | 1309 | 97.2k | digits[i] = static_cast<T>(*cp - U'0'); | 1310 | 97.2k | advance_and_return_if_error({}); | 1311 | 97.2k | } | 1312 | 41.4k | return true; | 1313 | 41.6k | } |
bool toml::v3::impl::impl_ex::parser::consume_digit_sequence<int>(int*, unsigned long) Line | Count | Source | 1298 | 4.23k | { | 1299 | 4.23k | return_if_error({}); | 1300 | 4.23k | TOML_ASSERT_ASSUME(digits); | 1301 | 4.23k | TOML_ASSERT_ASSUME(len); | 1302 | | | 1303 | 12.6k | for (size_t i = 0; i < len; i++) | 1304 | 8.45k | { | 1305 | 8.45k | set_error_and_return_if_eof({}); | 1306 | 8.45k | if (!is_decimal_digit(*cp)) | 1307 | 29 | return false; | 1308 | | | 1309 | 8.42k | digits[i] = static_cast<T>(*cp - U'0'); | 1310 | 8.42k | advance_and_return_if_error({}); | 1311 | 8.42k | } | 1312 | 4.20k | return true; | 1313 | 4.23k | } |
|
1314 | | |
1315 | | template <typename T> |
1316 | | TOML_NODISCARD |
1317 | | size_t consume_variable_length_digit_sequence(T* buffer, size_t max_len) |
1318 | 2.83k | { |
1319 | 2.83k | return_if_error({}); |
1320 | 2.83k | TOML_ASSERT_ASSUME(buffer); |
1321 | 2.83k | TOML_ASSERT_ASSUME(max_len); |
1322 | | |
1323 | 2.83k | size_t i = {}; |
1324 | 35.3k | for (; i < max_len; i++) |
1325 | 34.9k | { |
1326 | 34.9k | if (is_eof() || !is_decimal_digit(*cp)) |
1327 | 2.48k | break; |
1328 | | |
1329 | 32.5k | buffer[i] = static_cast<T>(*cp - U'0'); |
1330 | 32.5k | advance_and_return_if_error({}); |
1331 | 32.5k | } |
1332 | 2.83k | return i; |
1333 | 2.83k | } |
1334 | | |
1335 | | TOML_NODISCARD |
1336 | | TOML_NEVER_INLINE |
1337 | | std::string_view parse_basic_string(bool multi_line) |
1338 | 4.68k | { |
1339 | 4.68k | return_if_error({}); |
1340 | 4.68k | assert_not_eof(); |
1341 | 4.68k | TOML_ASSERT_ASSUME(*cp == U'"'); |
1342 | 4.68k | push_parse_scope("string"sv); |
1343 | | |
1344 | | // skip the '"' |
1345 | 4.68k | advance_and_return_if_error_or_eof({}); |
1346 | | |
1347 | | // multi-line strings ignore a single line ending right at the beginning |
1348 | 4.68k | if (multi_line) |
1349 | 2.17k | { |
1350 | 2.17k | consume_line_break(); |
1351 | 2.17k | return_if_error({}); |
1352 | 2.17k | set_error_and_return_if_eof({}); |
1353 | 2.17k | } |
1354 | | |
1355 | 4.68k | auto& str = string_buffer; |
1356 | 4.68k | str.clear(); |
1357 | 4.68k | bool escaped = false; |
1358 | 4.68k | bool skipping_whitespace = false; |
1359 | 4.68k | do |
1360 | 8.26M | { |
1361 | 8.26M | if (escaped) |
1362 | 4.64k | { |
1363 | 4.64k | escaped = false; |
1364 | | |
1365 | | // handle 'line ending slashes' in multi-line mode |
1366 | 4.64k | if (multi_line && is_whitespace(*cp)) |
1367 | 1.93k | { |
1368 | 1.93k | consume_leading_whitespace(); |
1369 | | |
1370 | 1.93k | if TOML_UNLIKELY(!consume_line_break()) |
1371 | 1.93k | set_error_and_return_default( |
1372 | 1.92k | "line-ending backslashes must be the last non-whitespace character on the line"sv); |
1373 | | |
1374 | 1.92k | skipping_whitespace = true; |
1375 | 1.92k | return_if_error({}); |
1376 | 1.92k | continue; |
1377 | 1.93k | } |
1378 | | |
1379 | 2.71k | bool skip_escaped_codepoint = true; |
1380 | 2.71k | assert_not_eof(); |
1381 | 2.71k | switch (const auto escaped_codepoint = *cp) |
1382 | 2.71k | { |
1383 | | // 'regular' escape codes |
1384 | 195 | case U'b': str += '\b'; break; |
1385 | 226 | case U'f': str += '\f'; break; |
1386 | 172 | case U'n': str += '\n'; break; |
1387 | 198 | case U'r': str += '\r'; break; |
1388 | 300 | case U't': str += '\t'; break; |
1389 | 192 | case U'"': str += '"'; break; |
1390 | 324 | case U'\\': str += '\\'; break; |
1391 | | |
1392 | | #if TOML_LANG_UNRELEASED // toml/pull/790 (\e shorthand for \x1B) |
1393 | | case U'e': str += '\x1B'; break; |
1394 | | #else |
1395 | 1 | case U'e': |
1396 | 1 | set_error_and_return_default( |
1397 | 0 | "escape sequence '\\e' is not supported in TOML 1.0.0 and earlier"sv); |
1398 | 0 | #endif |
1399 | | |
1400 | | #if TOML_LANG_UNRELEASED // toml/pull/796 (\xHH unicode scalar sequences) |
1401 | | case U'x': [[fallthrough]]; |
1402 | | #else |
1403 | 2 | case U'x': |
1404 | 2 | set_error_and_return_default( |
1405 | 0 | "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv); |
1406 | 0 | #endif |
1407 | | |
1408 | | // unicode scalar sequences |
1409 | 711 | case U'u': [[fallthrough]]; |
1410 | 1.03k | case U'U': |
1411 | 1.03k | { |
1412 | 1.03k | push_parse_scope("unicode scalar sequence"sv); |
1413 | 1.03k | advance_and_return_if_error_or_eof({}); |
1414 | 1.02k | skip_escaped_codepoint = false; |
1415 | | |
1416 | 1.02k | uint32_t place_value = |
1417 | 1.02k | escaped_codepoint == U'U' ? 0x10000000u : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); |
1418 | 1.02k | uint32_t sequence_value{}; |
1419 | 6.19k | while (place_value) |
1420 | 5.20k | { |
1421 | 5.20k | set_error_and_return_if_eof({}); |
1422 | | |
1423 | 5.19k | if TOML_UNLIKELY(!is_hexadecimal_digit(*cp)) |
1424 | 5.19k | set_error_and_return_default("expected hex digit, saw '"sv, to_sv(*cp), "'"sv); |
1425 | | |
1426 | 5.16k | sequence_value += place_value * hex_to_dec(*cp); |
1427 | 5.16k | place_value /= 16u; |
1428 | 5.16k | advance_and_return_if_error({}); |
1429 | 5.16k | } |
1430 | | |
1431 | 987 | if TOML_UNLIKELY(is_unicode_surrogate(sequence_value)) |
1432 | 987 | set_error_and_return_default( |
1433 | 984 | "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv); |
1434 | 984 | else if TOML_UNLIKELY(sequence_value > 0x10FFFFu) |
1435 | 15 | set_error_and_return_default("values greater than U+10FFFF are invalid"sv); |
1436 | | |
1437 | 969 | if (sequence_value < 0x80) |
1438 | 201 | { |
1439 | 201 | str += static_cast<char>(sequence_value); |
1440 | 201 | } |
1441 | 768 | else if (sequence_value < 0x800u) |
1442 | 220 | { |
1443 | 220 | str += static_cast<char>((sequence_value >> 6) | 0xC0u); |
1444 | 220 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1445 | 220 | } |
1446 | 548 | else if (sequence_value < 0x10000u) |
1447 | 294 | { |
1448 | 294 | str += static_cast<char>((sequence_value >> 12) | 0xE0u); |
1449 | 294 | str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u); |
1450 | 294 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1451 | 294 | } |
1452 | 254 | else if (sequence_value < 0x110000u) |
1453 | 251 | { |
1454 | 251 | str += static_cast<char>((sequence_value >> 18) | 0xF0u); |
1455 | 251 | str += static_cast<char>(((sequence_value >> 12) & 0x3Fu) | 0x80u); |
1456 | 251 | str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u); |
1457 | 251 | str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u); |
1458 | 251 | } |
1459 | 969 | break; |
1460 | 987 | } |
1461 | | |
1462 | | // ??? |
1463 | 0 | TOML_UNLIKELY_CASE |
1464 | 68 | default: set_error_and_return_default("unknown escape sequence '\\"sv, to_sv(*cp), "'"sv); |
1465 | 2.71k | } |
1466 | | |
1467 | 2.57k | if (skip_escaped_codepoint) |
1468 | 1.60k | advance_and_return_if_error_or_eof({}); |
1469 | 2.57k | } |
1470 | 8.26M | else |
1471 | 8.26M | { |
1472 | | // handle closing delimiters |
1473 | 8.26M | if (*cp == U'"') |
1474 | 5.65k | { |
1475 | 5.65k | if (multi_line) |
1476 | 3.33k | { |
1477 | 3.33k | size_t lookaheads = {}; |
1478 | 3.33k | size_t consecutive_delimiters = 1; |
1479 | 3.33k | do |
1480 | 8.83k | { |
1481 | 8.83k | advance_and_return_if_error({}); |
1482 | 8.83k | lookaheads++; |
1483 | 8.83k | if (!is_eof() && *cp == U'"') |
1484 | 6.07k | consecutive_delimiters++; |
1485 | 2.75k | else |
1486 | 2.75k | break; |
1487 | 8.83k | } |
1488 | 6.07k | while (lookaheads < 4u); |
1489 | | |
1490 | 3.33k | switch (consecutive_delimiters) |
1491 | 3.33k | { |
1492 | | // """ " (one quote somewhere in a ML string) |
1493 | 877 | case 1: |
1494 | 877 | str += '"'; |
1495 | 877 | skipping_whitespace = false; |
1496 | 877 | continue; |
1497 | | |
1498 | | // """ "" (two quotes somewhere in a ML string) |
1499 | 560 | case 2: |
1500 | 560 | str.append("\"\""sv); |
1501 | 560 | skipping_whitespace = false; |
1502 | 560 | continue; |
1503 | | |
1504 | | // """ """ (the end of the string) |
1505 | 744 | case 3: return str; |
1506 | | |
1507 | | // """ """" (one at the end of the string) |
1508 | 569 | case 4: str += '"'; return str; |
1509 | | |
1510 | | // """ """"" (two quotes at the end of the string) |
1511 | 581 | case 5: |
1512 | 581 | str.append("\"\""sv); |
1513 | 581 | advance_and_return_if_error({}); // skip the last '"' |
1514 | 581 | return str; |
1515 | | |
1516 | 0 | default: TOML_UNREACHABLE; |
1517 | 3.33k | } |
1518 | 3.33k | } |
1519 | 2.32k | else |
1520 | 2.32k | { |
1521 | 2.32k | advance_and_return_if_error({}); // skip the closing delimiter |
1522 | 2.32k | return str; |
1523 | 2.32k | } |
1524 | 5.65k | } |
1525 | | |
1526 | | // handle escapes |
1527 | 8.25M | else if (*cp == U'\\') |
1528 | 4.65k | { |
1529 | 4.65k | advance_and_return_if_error_or_eof({}); // skip the '\' |
1530 | 4.64k | skipping_whitespace = false; |
1531 | 4.64k | escaped = true; |
1532 | 4.64k | continue; |
1533 | 4.65k | } |
1534 | | |
1535 | | // handle line endings in multi-line mode |
1536 | 8.25M | if (multi_line && is_ascii_vertical_whitespace(*cp)) |
1537 | 1.03M | { |
1538 | 1.03M | consume_line_break(); |
1539 | 1.03M | return_if_error({}); |
1540 | 1.03M | if (!skipping_whitespace) |
1541 | 1.02M | str += '\n'; |
1542 | 1.03M | continue; |
1543 | 1.03M | } |
1544 | | |
1545 | | // handle control characters |
1546 | 7.21M | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1547 | 7.21M | set_error_and_return_default( |
1548 | 7.21M | "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv); |
1549 | | |
1550 | 7.21M | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1551 | | |
1552 | | // handle surrogates in strings |
1553 | 7.21M | if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1554 | 7.21M | set_error_and_return_default( |
1555 | 7.21M | "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv); |
1556 | 7.21M | #endif |
1557 | | |
1558 | 7.21M | if (multi_line) |
1559 | 72.8k | { |
1560 | 72.8k | if (!skipping_whitespace || !is_horizontal_whitespace(*cp)) |
1561 | 70.4k | { |
1562 | 70.4k | skipping_whitespace = false; |
1563 | 70.4k | str.append(cp->bytes, cp->count); |
1564 | 70.4k | } |
1565 | 72.8k | } |
1566 | 7.14M | else |
1567 | 7.14M | str.append(cp->bytes, cp->count); |
1568 | | |
1569 | 7.21M | advance_and_return_if_error({}); |
1570 | 7.21M | } |
1571 | 8.26M | } |
1572 | 8.26M | while (!is_eof()); |
1573 | | |
1574 | 257 | set_error_and_return_default("encountered end-of-file"sv); |
1575 | 257 | } |
1576 | | |
1577 | | TOML_NODISCARD |
1578 | | TOML_NEVER_INLINE |
1579 | | std::string_view parse_literal_string(bool multi_line) |
1580 | 5.90k | { |
1581 | 5.90k | return_if_error({}); |
1582 | 5.90k | assert_not_eof(); |
1583 | 5.90k | TOML_ASSERT_ASSUME(*cp == U'\''); |
1584 | 5.90k | push_parse_scope("literal string"sv); |
1585 | | |
1586 | | // skip the delimiter |
1587 | 5.90k | advance_and_return_if_error_or_eof({}); |
1588 | | |
1589 | | // multi-line strings ignore a single line ending right at the beginning |
1590 | 5.89k | if (multi_line) |
1591 | 2.96k | { |
1592 | 2.96k | consume_line_break(); |
1593 | 2.96k | return_if_error({}); |
1594 | 2.96k | set_error_and_return_if_eof({}); |
1595 | 2.96k | } |
1596 | | |
1597 | 5.89k | auto& str = string_buffer; |
1598 | 5.89k | str.clear(); |
1599 | 5.89k | do |
1600 | 19.4M | { |
1601 | 19.4M | return_if_error({}); |
1602 | | |
1603 | | // handle closing delimiters |
1604 | 19.4M | if (*cp == U'\'') |
1605 | 8.00k | { |
1606 | 8.00k | if (multi_line) |
1607 | 5.11k | { |
1608 | 5.11k | size_t lookaheads = {}; |
1609 | 5.11k | size_t consecutive_delimiters = 1; |
1610 | 5.11k | do |
1611 | 13.5k | { |
1612 | 13.5k | advance_and_return_if_error({}); |
1613 | 13.5k | lookaheads++; |
1614 | 13.5k | if (!is_eof() && *cp == U'\'') |
1615 | 9.19k | consecutive_delimiters++; |
1616 | 4.34k | else |
1617 | 4.34k | break; |
1618 | 13.5k | } |
1619 | 9.19k | while (lookaheads < 4u); |
1620 | | |
1621 | 5.11k | switch (consecutive_delimiters) |
1622 | 5.11k | { |
1623 | | // ''' ' (one quote somewhere in a ML string) |
1624 | 1.80k | case 1: str += '\''; continue; |
1625 | | |
1626 | | // ''' '' (two quotes somewhere in a ML string) |
1627 | 394 | case 2: str.append("''"sv); continue; |
1628 | | |
1629 | | // ''' ''' (the end of the string) |
1630 | 699 | case 3: return str; |
1631 | | |
1632 | | // ''' '''' (one at the end of the string) |
1633 | 1.44k | case 4: str += '\''; return str; |
1634 | | |
1635 | | // ''' ''''' (two quotes at the end of the string) |
1636 | 767 | case 5: |
1637 | 767 | str.append("''"sv); |
1638 | 767 | advance_and_return_if_error({}); // skip the last ' |
1639 | 767 | return str; |
1640 | | |
1641 | 0 | default: TOML_UNREACHABLE; |
1642 | 5.11k | } |
1643 | 5.11k | } |
1644 | 2.89k | else |
1645 | 2.89k | { |
1646 | 2.89k | advance_and_return_if_error({}); // skip the closing delimiter |
1647 | 2.89k | return str; |
1648 | 2.89k | } |
1649 | 8.00k | } |
1650 | | |
1651 | | // handle line endings in multi-line mode |
1652 | 19.4M | if (multi_line && is_ascii_vertical_whitespace(*cp)) |
1653 | 410k | { |
1654 | 410k | consume_line_break(); |
1655 | 410k | return_if_error({}); |
1656 | 410k | str += '\n'; |
1657 | 410k | continue; |
1658 | 410k | } |
1659 | | |
1660 | | // handle control characters |
1661 | 19.0M | if TOML_UNLIKELY(is_nontab_control_character(*cp)) |
1662 | 19.0M | set_error_and_return_default( |
1663 | 19.0M | "control characters other than TAB (U+0009) are explicitly prohibited"sv); |
1664 | | |
1665 | 19.0M | #if TOML_LANG_AT_LEAST(1, 0, 0) |
1666 | | |
1667 | | // handle surrogates in strings |
1668 | 19.0M | if TOML_UNLIKELY(is_unicode_surrogate(*cp)) |
1669 | 19.0M | set_error_and_return_default("unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv); |
1670 | 19.0M | #endif |
1671 | | |
1672 | 19.0M | str.append(cp->bytes, cp->count); |
1673 | 19.0M | advance_and_return_if_error({}); |
1674 | 19.0M | } |
1675 | 19.4M | while (!is_eof()); |
1676 | | |
1677 | 77 | set_error_and_return_default("encountered end-of-file"sv); |
1678 | 77 | } |
1679 | | |
1680 | | TOML_NODISCARD |
1681 | | TOML_NEVER_INLINE |
1682 | | parsed_string parse_string() |
1683 | 10.6k | { |
1684 | 10.6k | return_if_error({}); |
1685 | 10.6k | assert_not_eof(); |
1686 | 10.6k | TOML_ASSERT_ASSUME(is_string_delimiter(*cp)); |
1687 | 10.6k | push_parse_scope("string"sv); |
1688 | | |
1689 | | // snapshot length so the recording buffer can be rewound alongside go_back(2u) below |
1690 | 10.6k | const auto recording_buffer_rollback_size = recording_buffer.length(); |
1691 | | |
1692 | | // get the first three characters to determine the string type |
1693 | 10.6k | const auto first = cp->value; |
1694 | 10.6k | advance_and_return_if_error_or_eof({}); |
1695 | 10.6k | const auto second = cp->value; |
1696 | 10.6k | advance_and_return_if_error({}); |
1697 | 10.6k | const auto third = cp ? cp->value : U'\0'; |
1698 | | |
1699 | | // if we were eof at the third character then first and second need to be |
1700 | | // the same string character (otherwise it's an unterminated string) |
1701 | 10.6k | if (is_eof()) |
1702 | 47 | { |
1703 | 47 | if (second == first) |
1704 | 31 | return {}; |
1705 | | |
1706 | 16 | set_error_and_return_default("encountered end-of-file"sv); |
1707 | 16 | } |
1708 | | |
1709 | | // if the first three characters are all the same string delimiter then |
1710 | | // it's a multi-line string. |
1711 | 10.5k | else if (first == second && first == third) |
1712 | 5.14k | { |
1713 | 5.14k | return { first == U'\'' ? parse_literal_string(true) : parse_basic_string(true), true }; |
1714 | 5.14k | } |
1715 | | |
1716 | | // otherwise it's just a regular string. |
1717 | 5.43k | else |
1718 | 5.43k | { |
1719 | | // step back two characters so that the current |
1720 | | // character is the string delimiter |
1721 | 5.43k | go_back(2u); |
1722 | 5.43k | if (recording) |
1723 | 3.48k | recording_buffer.resize(recording_buffer_rollback_size); |
1724 | | |
1725 | 5.43k | return { first == U'\'' ? parse_literal_string(false) : parse_basic_string(false), false }; |
1726 | 5.43k | } |
1727 | 10.6k | } |
1728 | | |
1729 | | TOML_NODISCARD |
1730 | | TOML_NEVER_INLINE |
1731 | | std::string_view parse_bare_key_segment() |
1732 | 693k | { |
1733 | 693k | return_if_error({}); |
1734 | 693k | assert_not_eof(); |
1735 | 693k | TOML_ASSERT_ASSUME(is_bare_key_character(*cp)); |
1736 | | |
1737 | 693k | string_buffer.clear(); |
1738 | | |
1739 | 11.1M | while (!is_eof()) |
1740 | 11.1M | { |
1741 | 11.1M | if (!is_bare_key_character(*cp)) |
1742 | 693k | break; |
1743 | | |
1744 | 10.4M | string_buffer.append(cp->bytes, cp->count); |
1745 | 10.4M | advance_and_return_if_error({}); |
1746 | 10.4M | } |
1747 | | |
1748 | 693k | return string_buffer; |
1749 | 693k | } |
1750 | | |
1751 | | TOML_NODISCARD |
1752 | | TOML_NEVER_INLINE |
1753 | | bool parse_boolean() |
1754 | 1.71k | { |
1755 | 1.71k | return_if_error({}); |
1756 | 1.71k | assert_not_eof(); |
1757 | 1.71k | TOML_ASSERT_ASSUME(is_match(*cp, U't', U'f', U'T', U'F')); |
1758 | 1.71k | push_parse_scope("boolean"sv); |
1759 | | |
1760 | 1.71k | start_recording(true); |
1761 | 1.71k | auto result = is_match(*cp, U't', U'T'); |
1762 | 1.71k | if (!consume_expected_sequence(result ? U"true"sv : U"false"sv)) |
1763 | 1.71k | set_error_and_return_default("expected '"sv, |
1764 | 1.69k | to_sv(result), |
1765 | 1.69k | "', saw '"sv, |
1766 | 1.69k | to_sv(recording_buffer), |
1767 | 1.69k | "'"sv); |
1768 | 1.69k | stop_recording(); |
1769 | | |
1770 | 1.69k | if (cp && !is_value_terminator(*cp)) |
1771 | 1.69k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
1772 | | |
1773 | 1.68k | return result; |
1774 | 1.69k | } |
1775 | | |
1776 | | TOML_NODISCARD |
1777 | | TOML_NEVER_INLINE |
1778 | | double parse_inf_or_nan() |
1779 | 1.77k | { |
1780 | 1.77k | return_if_error({}); |
1781 | 1.77k | assert_not_eof(); |
1782 | 1.77k | TOML_ASSERT_ASSUME(is_match(*cp, U'i', U'n', U'I', U'N', U'+', U'-')); |
1783 | 1.77k | push_parse_scope("floating-point"sv); |
1784 | | |
1785 | 1.77k | start_recording(true); |
1786 | 1.77k | const bool negative = *cp == U'-'; |
1787 | 1.77k | if (negative || *cp == U'+') |
1788 | 533 | advance_and_return_if_error_or_eof({}); |
1789 | | |
1790 | 1.77k | const bool inf = is_match(*cp, U'i', U'I'); |
1791 | 1.77k | if (!consume_expected_sequence(inf ? U"inf"sv : U"nan"sv)) |
1792 | 1.77k | set_error_and_return_default("expected '"sv, |
1793 | 1.74k | inf ? "inf"sv : "nan"sv, |
1794 | 1.74k | "', saw '"sv, |
1795 | 1.74k | to_sv(recording_buffer), |
1796 | 1.74k | "'"sv); |
1797 | 1.74k | stop_recording(); |
1798 | | |
1799 | 1.74k | if (cp && !is_value_terminator(*cp)) |
1800 | 1.74k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
1801 | | |
1802 | 1.72k | return inf ? (negative ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()) |
1803 | 1.72k | : std::numeric_limits<double>::quiet_NaN(); |
1804 | 1.74k | } |
1805 | | |
1806 | | TOML_NODISCARD |
1807 | | TOML_NEVER_INLINE |
1808 | | double parse_float() |
1809 | 7.43k | { |
1810 | 7.43k | return_if_error({}); |
1811 | 7.43k | assert_not_eof(); |
1812 | 7.43k | TOML_ASSERT_ASSUME(is_match(*cp, U'+', U'-', U'.') || is_decimal_digit(*cp)); |
1813 | 7.43k | push_parse_scope("floating-point"sv); |
1814 | | |
1815 | | // sign |
1816 | 7.43k | const int sign = *cp == U'-' ? -1 : 1; |
1817 | 7.43k | if (is_match(*cp, U'+', U'-')) |
1818 | 1.90k | advance_and_return_if_error_or_eof({}); |
1819 | | |
1820 | | // consume value chars |
1821 | 7.43k | char chars[utf8_buffered_reader::max_history_length]; |
1822 | 7.43k | size_t length = {}; |
1823 | 7.43k | const utf8_codepoint* prev = {}; |
1824 | 7.43k | bool seen_decimal = false, seen_exponent = false; |
1825 | 7.43k | char first_integer_part = '\0'; |
1826 | 47.4k | while (!is_eof() && !is_value_terminator(*cp)) |
1827 | 40.1k | { |
1828 | 40.1k | if (*cp == U'_') |
1829 | 738 | { |
1830 | 738 | if (!prev || !is_decimal_digit(*prev)) |
1831 | 738 | set_error_and_return_default("underscores may only follow digits"sv); |
1832 | | |
1833 | 731 | prev = cp; |
1834 | 731 | advance_and_return_if_error_or_eof({}); |
1835 | 730 | continue; |
1836 | 731 | } |
1837 | 39.4k | else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp)) |
1838 | 39.4k | set_error_and_return_default("underscores must be followed by digits"sv); |
1839 | 39.3k | else if TOML_UNLIKELY(length == sizeof(chars)) |
1840 | 39.3k | set_error_and_return_default("exceeds length limit of "sv, |
1841 | 39.3k | sizeof(chars), |
1842 | 39.3k | " digits"sv, |
1843 | 39.3k | (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); |
1844 | 39.3k | else if (*cp == U'.') |
1845 | 6.14k | { |
1846 | | // .1 |
1847 | | // -.1 |
1848 | | // +.1 (no integer part) |
1849 | 6.14k | if (!first_integer_part) |
1850 | 6.14k | set_error_and_return_default("expected decimal digit, saw '.'"sv); |
1851 | | |
1852 | | // 1.0e+.10 (exponent cannot have '.') |
1853 | 6.13k | else if (seen_exponent) |
1854 | 6.13k | set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv); |
1855 | | |
1856 | | // 1.0.e+.10 |
1857 | | // 1..0 |
1858 | | // (multiple '.') |
1859 | 6.12k | else if (seen_decimal) |
1860 | 2 | set_error_and_return_default("expected decimal digit or exponent, saw '.'"sv); |
1861 | | |
1862 | 6.12k | seen_decimal = true; |
1863 | 6.12k | } |
1864 | 33.2k | else if (is_match(*cp, U'e', U'E')) |
1865 | 1.28k | { |
1866 | 1.28k | if (prev && !is_decimal_digit(*prev)) |
1867 | 1.28k | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1868 | | |
1869 | | // 1.0ee+10 (multiple 'e') |
1870 | 1.28k | else if (seen_exponent) |
1871 | 1 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1872 | | |
1873 | 1.27k | seen_decimal = true; // implied |
1874 | 1.27k | seen_exponent = true; |
1875 | 1.27k | } |
1876 | 31.9k | else if (is_match(*cp, U'+', U'-')) |
1877 | 641 | { |
1878 | | // 1.-0 (sign in mantissa) |
1879 | 641 | if (!seen_exponent) |
1880 | 641 | set_error_and_return_default("expected decimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
1881 | | |
1882 | | // 1.0e1-0 (misplaced exponent sign) |
1883 | 640 | else if (!is_match(*prev, U'e', U'E')) |
1884 | 3 | set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv); |
1885 | 641 | } |
1886 | 31.3k | else if (is_decimal_digit(*cp)) |
1887 | 31.2k | { |
1888 | 31.2k | if (!seen_decimal) |
1889 | 16.8k | { |
1890 | 16.8k | if (!first_integer_part) |
1891 | 7.41k | first_integer_part = static_cast<char>(cp->bytes[0]); |
1892 | 9.46k | else if (first_integer_part == '0') |
1893 | 1 | set_error_and_return_default("leading zeroes are prohibited"sv); |
1894 | 16.8k | } |
1895 | 31.2k | } |
1896 | 39 | else |
1897 | 39 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1898 | | |
1899 | 39.3k | chars[length++] = static_cast<char>(cp->bytes[0]); |
1900 | 39.3k | prev = cp; |
1901 | 39.3k | advance_and_return_if_error({}); |
1902 | 39.3k | } |
1903 | | |
1904 | | // sanity-check ending state |
1905 | 7.35k | if (prev) |
1906 | 7.34k | { |
1907 | 7.34k | if (*prev == U'_') |
1908 | 1 | { |
1909 | 1 | set_error_and_return_if_eof({}); |
1910 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); |
1911 | 1 | } |
1912 | 7.34k | else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) |
1913 | 34 | { |
1914 | 34 | set_error_and_return_if_eof({}); |
1915 | 12 | set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv); |
1916 | 12 | } |
1917 | 7.34k | } |
1918 | | |
1919 | | // convert to double |
1920 | 7.31k | double result; |
1921 | | #if TOML_FLOAT_CHARCONV |
1922 | | { |
1923 | | auto fc_result = std::from_chars(chars, chars + length, result); |
1924 | | switch (fc_result.ec) |
1925 | | { |
1926 | | TOML_LIKELY_CASE |
1927 | | case std::errc{}: // ok |
1928 | | return result * sign; |
1929 | | |
1930 | | case std::errc::invalid_argument: |
1931 | | set_error_and_return_default("'"sv, |
1932 | | std::string_view{ chars, length }, |
1933 | | "' could not be interpreted as a value"sv); |
1934 | | break; |
1935 | | |
1936 | | case std::errc::result_out_of_range: |
1937 | | set_error_and_return_default("'"sv, |
1938 | | std::string_view{ chars, length }, |
1939 | | "' is not representable in 64 bits"sv); |
1940 | | break; |
1941 | | |
1942 | | default: //?? |
1943 | | set_error_and_return_default("an unspecified error occurred while trying to interpret '"sv, |
1944 | | std::string_view{ chars, length }, |
1945 | | "' as a value"sv); |
1946 | | } |
1947 | | } |
1948 | | #else |
1949 | 7.31k | { |
1950 | 7.31k | std::stringstream ss; |
1951 | 7.31k | ss.imbue(std::locale::classic()); |
1952 | 7.31k | ss.write(chars, static_cast<std::streamsize>(length)); |
1953 | 7.31k | if ((ss >> result)) |
1954 | 7.31k | return result * sign; |
1955 | 6 | else |
1956 | 7.31k | set_error_and_return_default("'"sv, |
1957 | 7.31k | std::string_view{ chars, length }, |
1958 | 7.31k | "' could not be interpreted as a value"sv); |
1959 | 7.31k | } |
1960 | 7.31k | #endif |
1961 | 7.31k | } |
1962 | | |
1963 | | TOML_NODISCARD |
1964 | | TOML_NEVER_INLINE |
1965 | | double parse_hex_float() |
1966 | 6 | { |
1967 | 6 | return_if_error({}); |
1968 | 6 | assert_not_eof(); |
1969 | 6 | TOML_ASSERT_ASSUME(is_match(*cp, U'0', U'+', U'-')); |
1970 | 6 | push_parse_scope("hexadecimal floating-point"sv); |
1971 | | |
1972 | | #if TOML_LANG_UNRELEASED // toml/issues/562 (hexfloats) |
1973 | | |
1974 | | // sign |
1975 | | const int sign = *cp == U'-' ? -1 : 1; |
1976 | | if (is_match(*cp, U'+', U'-')) |
1977 | | advance_and_return_if_error_or_eof({}); |
1978 | | |
1979 | | // '0' |
1980 | | if (*cp != U'0') |
1981 | | set_error_and_return_default(" expected '0', saw '"sv, to_sv(*cp), "'"sv); |
1982 | | advance_and_return_if_error_or_eof({}); |
1983 | | |
1984 | | // 'x' or 'X' |
1985 | | if (!is_match(*cp, U'x', U'X')) |
1986 | | set_error_and_return_default("expected 'x' or 'X', saw '"sv, to_sv(*cp), "'"sv); |
1987 | | advance_and_return_if_error_or_eof({}); |
1988 | | |
1989 | | // <HEX DIGITS> ([.]<HEX DIGITS>)? [pP] [+-]? <DEC DIGITS> |
1990 | | |
1991 | | // consume value fragments |
1992 | | struct fragment |
1993 | | { |
1994 | | char chars[24]; |
1995 | | size_t length; |
1996 | | double value; |
1997 | | }; |
1998 | | fragment fragments[] = { |
1999 | | {}, // mantissa, whole part |
2000 | | {}, // mantissa, fractional part |
2001 | | {} // exponent |
2002 | | }; |
2003 | | fragment* current_fragment = fragments; |
2004 | | const utf8_codepoint* prev = {}; |
2005 | | int exponent_sign = 1; |
2006 | | while (!is_eof() && !is_value_terminator(*cp)) |
2007 | | { |
2008 | | if (*cp == U'_') |
2009 | | { |
2010 | | if (!prev || !is_hexadecimal_digit(*prev)) |
2011 | | set_error_and_return_default("underscores may only follow digits"sv); |
2012 | | |
2013 | | prev = cp; |
2014 | | advance_and_return_if_error_or_eof({}); |
2015 | | continue; |
2016 | | } |
2017 | | else if (prev && *prev == U'_' && !is_hexadecimal_digit(*cp)) |
2018 | | set_error_and_return_default("underscores must be followed by digits"sv); |
2019 | | else if (*cp == U'.') |
2020 | | { |
2021 | | // 0x10.0p-.0 (exponent cannot have '.') |
2022 | | if (current_fragment == fragments + 2) |
2023 | | set_error_and_return_default("expected exponent digit or sign, saw '.'"sv); |
2024 | | |
2025 | | // 0x10.0.p-0 (multiple '.') |
2026 | | else if (current_fragment == fragments + 1) |
2027 | | set_error_and_return_default("expected hexadecimal digit or exponent, saw '.'"sv); |
2028 | | |
2029 | | else |
2030 | | current_fragment++; |
2031 | | } |
2032 | | else if (is_match(*cp, U'p', U'P')) |
2033 | | { |
2034 | | // 0x10.0pp-0 (multiple 'p') |
2035 | | if (current_fragment == fragments + 2) |
2036 | | set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2037 | | |
2038 | | // 0x.p-0 (mantissa is just '.') |
2039 | | else if (fragments[0].length == 0u && fragments[1].length == 0u) |
2040 | | set_error_and_return_default("expected hexadecimal digit, saw '"sv, to_sv(*cp), "'"sv); |
2041 | | |
2042 | | else |
2043 | | current_fragment = fragments + 2; |
2044 | | } |
2045 | | else if (is_match(*cp, U'+', U'-')) |
2046 | | { |
2047 | | // 0x-10.0p-0 (sign in mantissa) |
2048 | | if (current_fragment != fragments + 2) |
2049 | | set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
2050 | | |
2051 | | // 0x10.0p0- (misplaced exponent sign) |
2052 | | else if (!is_match(*prev, U'p', U'P')) |
2053 | | set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv); |
2054 | | |
2055 | | else |
2056 | | exponent_sign = *cp == U'-' ? -1 : 1; |
2057 | | } |
2058 | | else if (current_fragment < fragments + 2 && !is_hexadecimal_digit(*cp)) |
2059 | | set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv); |
2060 | | else if (current_fragment == fragments + 2 && !is_decimal_digit(*cp)) |
2061 | | set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2062 | | else if (current_fragment->length == sizeof(fragment::chars)) |
2063 | | set_error_and_return_default("fragment exceeeds maximum length of "sv, |
2064 | | sizeof(fragment::chars), |
2065 | | " characters"sv); |
2066 | | else |
2067 | | current_fragment->chars[current_fragment->length++] = static_cast<char>(cp->bytes[0]); |
2068 | | |
2069 | | prev = cp; |
2070 | | advance_and_return_if_error({}); |
2071 | | } |
2072 | | |
2073 | | // sanity-check ending state |
2074 | | if (current_fragment != fragments + 2 || current_fragment->length == 0u) |
2075 | | { |
2076 | | set_error_and_return_if_eof({}); |
2077 | | set_error_and_return_default("missing exponent"sv); |
2078 | | } |
2079 | | else if (prev && *prev == U'_') |
2080 | | { |
2081 | | set_error_and_return_if_eof({}); |
2082 | | set_error_and_return_default("underscores must be followed by digits"sv); |
2083 | | } |
2084 | | |
2085 | | // calculate values for the three fragments |
2086 | | for (int fragment_idx = 0; fragment_idx < 3; fragment_idx++) |
2087 | | { |
2088 | | auto& f = fragments[fragment_idx]; |
2089 | | const uint32_t base = fragment_idx == 2 ? 10u : 16u; |
2090 | | |
2091 | | // left-trim zeroes |
2092 | | const char* c = f.chars; |
2093 | | size_t sig = {}; |
2094 | | while (f.length && *c == '0') |
2095 | | { |
2096 | | f.length--; |
2097 | | c++; |
2098 | | sig++; |
2099 | | } |
2100 | | if (!f.length) |
2101 | | continue; |
2102 | | |
2103 | | // calculate value |
2104 | | auto place = 1u; |
2105 | | for (size_t i = 0; i < f.length - 1u; i++) |
2106 | | place *= base; |
2107 | | uint32_t val{}; |
2108 | | while (place) |
2109 | | { |
2110 | | if (base == 16) |
2111 | | val += place * hex_to_dec(*c); |
2112 | | else |
2113 | | val += place * static_cast<uint32_t>(*c - '0'); |
2114 | | if (fragment_idx == 1) |
2115 | | sig++; |
2116 | | c++; |
2117 | | place /= base; |
2118 | | } |
2119 | | f.value = static_cast<double>(val); |
2120 | | |
2121 | | // shift the fractional part |
2122 | | if (fragment_idx == 1) |
2123 | | { |
2124 | | while (sig--) |
2125 | | f.value /= base; |
2126 | | } |
2127 | | } |
2128 | | |
2129 | | return (fragments[0].value + fragments[1].value) * pow(2.0, fragments[2].value * exponent_sign) * sign; |
2130 | | |
2131 | | #else // !TOML_LANG_UNRELEASED |
2132 | | |
2133 | 6 | set_error_and_return_default("hexadecimal floating-point values are not supported " |
2134 | 6 | "in TOML 1.0.0 and earlier"sv); |
2135 | | |
2136 | 6 | #endif // !TOML_LANG_UNRELEASED |
2137 | 6 | } |
2138 | | |
2139 | | template <uint64_t base> |
2140 | | TOML_NODISCARD |
2141 | | TOML_NEVER_INLINE |
2142 | | int64_t parse_integer() |
2143 | 35.8k | { |
2144 | 35.8k | return_if_error({}); |
2145 | 35.8k | assert_not_eof(); |
2146 | 35.8k | using traits = parse_integer_traits<base>; |
2147 | 35.8k | push_parse_scope(traits::scope_qualifier); |
2148 | | |
2149 | 35.8k | [[maybe_unused]] int64_t sign = 1; |
2150 | | if constexpr (traits::is_signed) |
2151 | 31.4k | { |
2152 | 31.4k | sign = *cp == U'-' ? -1 : 1; |
2153 | 31.4k | if (is_match(*cp, U'+', U'-')) |
2154 | 10.2k | advance_and_return_if_error_or_eof({}); |
2155 | 31.4k | } |
2156 | | |
2157 | | if constexpr (base == 10) |
2158 | 31.4k | { |
2159 | 31.4k | if (!traits::is_digit(*cp)) |
2160 | 31.4k | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); |
2161 | | } |
2162 | | else |
2163 | 4.45k | { |
2164 | | // '0' |
2165 | 4.45k | if (*cp != U'0') |
2166 | 4.45k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); |
2167 | 4.45k | advance_and_return_if_error_or_eof({}); |
2168 | | |
2169 | | // 'b', 'o', 'x' |
2170 | 4.45k | if (*cp != traits::prefix_codepoint) |
2171 | 4.45k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); |
2172 | 4.44k | advance_and_return_if_error_or_eof({}); |
2173 | | |
2174 | 4.43k | if (!traits::is_digit(*cp)) |
2175 | 4.43k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); |
2176 | 4.43k | } |
2177 | | |
2178 | | // consume digits |
2179 | 35.7k | char digits[utf8_buffered_reader::max_history_length]; |
2180 | 35.8k | size_t length = {}; |
2181 | 35.8k | const utf8_codepoint* prev = {}; |
2182 | 355k | while (!is_eof() && !is_value_terminator(*cp)) |
2183 | 319k | { |
2184 | 319k | if (*cp == U'_') |
2185 | 32.6k | { |
2186 | 32.6k | if (!prev || !traits::is_digit(*prev)) |
2187 | 32.6k | set_error_and_return_default("underscores may only follow digits"sv); |
2188 | | |
2189 | 32.6k | prev = cp; |
2190 | 32.6k | advance_and_return_if_error_or_eof({}); |
2191 | 32.6k | continue; |
2192 | 32.6k | } |
2193 | 286k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) |
2194 | 286k | set_error_and_return_default("underscores must be followed by digits"sv); |
2195 | 286k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) |
2196 | 286k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); |
2197 | 286k | else if TOML_UNLIKELY(length == sizeof(digits)) |
2198 | 286k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); |
2199 | 286k | else |
2200 | 286k | digits[length++] = static_cast<char>(cp->bytes[0]); |
2201 | | |
2202 | 286k | prev = cp; |
2203 | 286k | advance_and_return_if_error({}); |
2204 | 286k | } |
2205 | | |
2206 | | // sanity check ending state |
2207 | 35.6k | if (prev && *prev == U'_') |
2208 | 4 | { |
2209 | 4 | set_error_and_return_if_eof({}); |
2210 | 4 | set_error_and_return_default("underscores must be followed by digits"sv); |
2211 | 4 | } |
2212 | | |
2213 | | // single digits can be converted trivially |
2214 | 35.6k | if (length == 1u) |
2215 | 1.69k | { |
2216 | 1.69k | int64_t result; |
2217 | | |
2218 | | if constexpr (base == 16) |
2219 | 1.06k | result = static_cast<int64_t>(hex_to_dec(digits[0])); |
2220 | | else |
2221 | 636 | result = static_cast<int64_t>(digits[0] - '0'); |
2222 | | |
2223 | | if constexpr (traits::is_signed) |
2224 | 0 | result *= sign; |
2225 | | |
2226 | 1.69k | return result; |
2227 | 1.69k | } |
2228 | | |
2229 | | // bin, oct and hex allow leading zeroes so trim them first |
2230 | 33.9k | const char* end = digits + length; |
2231 | 33.9k | const char* msd = digits; |
2232 | | if constexpr (base != 10) |
2233 | 2.63k | { |
2234 | 20.9k | while (msd < end && *msd == '0') |
2235 | 18.2k | msd++; |
2236 | 2.63k | if (msd == end) |
2237 | 1.02k | return 0ll; |
2238 | | } |
2239 | | |
2240 | | // decimal integers do not allow leading zeroes |
2241 | | else |
2242 | 31.3k | { |
2243 | 31.3k | if TOML_UNLIKELY(digits[0] == '0') |
2244 | 31.3k | set_error_and_return_default("leading zeroes are prohibited"sv); |
2245 | 31.3k | } |
2246 | | |
2247 | | // range check |
2248 | 33.9k | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) |
2249 | 33.9k | set_error_and_return_default("'"sv, |
2250 | 33.9k | traits::full_prefix, |
2251 | 33.9k | std::string_view{ digits, length }, |
2252 | 33.9k | "' is not representable as a signed 64-bit integer"sv); |
2253 | | |
2254 | | // do the thing |
2255 | 33.9k | { |
2256 | 33.9k | uint64_t result = {}; |
2257 | 33.9k | { |
2258 | 33.9k | uint64_t power = 1; |
2259 | 297k | while (--end >= msd) |
2260 | 263k | { |
2261 | | if constexpr (base == 16) |
2262 | 4.10k | result += power * hex_to_dec(*end); |
2263 | | else |
2264 | 259k | result += power * static_cast<uint64_t>(*end - '0'); |
2265 | | |
2266 | 263k | power *= base; |
2267 | 263k | } |
2268 | 33.9k | } |
2269 | | |
2270 | | // range check |
2271 | 33.9k | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); |
2272 | 33.9k | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) |
2273 | 33.9k | set_error_and_return_default("'"sv, |
2274 | 33.9k | traits::full_prefix, |
2275 | 33.9k | std::string_view{ digits, length }, |
2276 | 33.9k | "' is not representable as a signed 64-bit integer"sv); |
2277 | | |
2278 | | if constexpr (traits::is_signed) |
2279 | 31.2k | { |
2280 | | // avoid signed multiply UB when parsing INT64_MIN |
2281 | 31.2k | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) |
2282 | 163 | return (std::numeric_limits<int64_t>::min)(); |
2283 | | |
2284 | 31.1k | return static_cast<int64_t>(result) * sign; |
2285 | | } |
2286 | | else |
2287 | 2.61k | return static_cast<int64_t>(result); |
2288 | 33.9k | } |
2289 | 33.9k | } long toml::v3::impl::impl_ex::parser::parse_integer<16ul>() Line | Count | Source | 2143 | 2.04k | { | 2144 | 2.04k | return_if_error({}); | 2145 | 2.04k | assert_not_eof(); | 2146 | 2.04k | using traits = parse_integer_traits<base>; | 2147 | 2.04k | push_parse_scope(traits::scope_qualifier); | 2148 | | | 2149 | 2.04k | [[maybe_unused]] int64_t sign = 1; | 2150 | | if constexpr (traits::is_signed) | 2151 | | { | 2152 | | sign = *cp == U'-' ? -1 : 1; | 2153 | | if (is_match(*cp, U'+', U'-')) | 2154 | | advance_and_return_if_error_or_eof({}); | 2155 | | } | 2156 | | | 2157 | | if constexpr (base == 10) | 2158 | | { | 2159 | | if (!traits::is_digit(*cp)) | 2160 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2161 | | } | 2162 | | else | 2163 | 2.04k | { | 2164 | | // '0' | 2165 | 2.04k | if (*cp != U'0') | 2166 | 2.04k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 2.04k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | | // 'b', 'o', 'x' | 2170 | 2.04k | if (*cp != traits::prefix_codepoint) | 2171 | 2.04k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2172 | 2.04k | advance_and_return_if_error_or_eof({}); | 2173 | | | 2174 | 2.03k | if (!traits::is_digit(*cp)) | 2175 | 2.03k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2176 | 2.03k | } | 2177 | | | 2178 | | // consume digits | 2179 | 2.02k | char digits[utf8_buffered_reader::max_history_length]; | 2180 | 2.04k | size_t length = {}; | 2181 | 2.04k | const utf8_codepoint* prev = {}; | 2182 | 32.5k | while (!is_eof() && !is_value_terminator(*cp)) | 2183 | 30.5k | { | 2184 | 30.5k | if (*cp == U'_') | 2185 | 12.1k | { | 2186 | 12.1k | if (!prev || !traits::is_digit(*prev)) | 2187 | 12.1k | set_error_and_return_default("underscores may only follow digits"sv); | 2188 | | | 2189 | 12.1k | prev = cp; | 2190 | 12.1k | advance_and_return_if_error_or_eof({}); | 2191 | 12.1k | continue; | 2192 | 12.1k | } | 2193 | 18.3k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2194 | 18.3k | set_error_and_return_default("underscores must be followed by digits"sv); | 2195 | 18.3k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2196 | 18.3k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2197 | 18.3k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2198 | 18.3k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2199 | 18.3k | else | 2200 | 18.3k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2201 | | | 2202 | 18.3k | prev = cp; | 2203 | 18.3k | advance_and_return_if_error({}); | 2204 | 18.3k | } | 2205 | | | 2206 | | // sanity check ending state | 2207 | 2.00k | if (prev && *prev == U'_') | 2208 | 1 | { | 2209 | 1 | set_error_and_return_if_eof({}); | 2210 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2211 | 1 | } | 2212 | | | 2213 | | // single digits can be converted trivially | 2214 | 2.00k | if (length == 1u) | 2215 | 1.06k | { | 2216 | 1.06k | int64_t result; | 2217 | | | 2218 | | if constexpr (base == 16) | 2219 | 1.06k | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2220 | | else | 2221 | | result = static_cast<int64_t>(digits[0] - '0'); | 2222 | | | 2223 | | if constexpr (traits::is_signed) | 2224 | | result *= sign; | 2225 | | | 2226 | 1.06k | return result; | 2227 | 1.06k | } | 2228 | | | 2229 | | // bin, oct and hex allow leading zeroes so trim them first | 2230 | 946 | const char* end = digits + length; | 2231 | 946 | const char* msd = digits; | 2232 | | if constexpr (base != 10) | 2233 | 946 | { | 2234 | 13.2k | while (msd < end && *msd == '0') | 2235 | 12.3k | msd++; | 2236 | 946 | if (msd == end) | 2237 | 239 | return 0ll; | 2238 | | } | 2239 | | | 2240 | | // decimal integers do not allow leading zeroes | 2241 | | else | 2242 | | { | 2243 | | if TOML_UNLIKELY(digits[0] == '0') | 2244 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2245 | | } | 2246 | | | 2247 | | // range check | 2248 | 946 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2249 | 946 | set_error_and_return_default("'"sv, | 2250 | 942 | traits::full_prefix, | 2251 | 942 | std::string_view{ digits, length }, | 2252 | 942 | "' is not representable as a signed 64-bit integer"sv); | 2253 | | | 2254 | | // do the thing | 2255 | 942 | { | 2256 | 942 | uint64_t result = {}; | 2257 | 942 | { | 2258 | 942 | uint64_t power = 1; | 2259 | 5.04k | while (--end >= msd) | 2260 | 4.10k | { | 2261 | | if constexpr (base == 16) | 2262 | 4.10k | result += power * hex_to_dec(*end); | 2263 | | else | 2264 | | result += power * static_cast<uint64_t>(*end - '0'); | 2265 | | | 2266 | 4.10k | power *= base; | 2267 | 4.10k | } | 2268 | 942 | } | 2269 | | | 2270 | | // range check | 2271 | 942 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2272 | 942 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2273 | 942 | set_error_and_return_default("'"sv, | 2274 | 926 | traits::full_prefix, | 2275 | 926 | std::string_view{ digits, length }, | 2276 | 926 | "' is not representable as a signed 64-bit integer"sv); | 2277 | | | 2278 | | if constexpr (traits::is_signed) | 2279 | | { | 2280 | | // avoid signed multiply UB when parsing INT64_MIN | 2281 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2282 | | return (std::numeric_limits<int64_t>::min)(); | 2283 | | | 2284 | | return static_cast<int64_t>(result) * sign; | 2285 | | } | 2286 | | else | 2287 | 926 | return static_cast<int64_t>(result); | 2288 | 926 | } | 2289 | 926 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<8ul>() Line | Count | Source | 2143 | 1.15k | { | 2144 | 1.15k | return_if_error({}); | 2145 | 1.15k | assert_not_eof(); | 2146 | 1.15k | using traits = parse_integer_traits<base>; | 2147 | 1.15k | push_parse_scope(traits::scope_qualifier); | 2148 | | | 2149 | 1.15k | [[maybe_unused]] int64_t sign = 1; | 2150 | | if constexpr (traits::is_signed) | 2151 | | { | 2152 | | sign = *cp == U'-' ? -1 : 1; | 2153 | | if (is_match(*cp, U'+', U'-')) | 2154 | | advance_and_return_if_error_or_eof({}); | 2155 | | } | 2156 | | | 2157 | | if constexpr (base == 10) | 2158 | | { | 2159 | | if (!traits::is_digit(*cp)) | 2160 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2161 | | } | 2162 | | else | 2163 | 1.15k | { | 2164 | | // '0' | 2165 | 1.15k | if (*cp != U'0') | 2166 | 1.15k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 1.15k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | | // 'b', 'o', 'x' | 2170 | 1.15k | if (*cp != traits::prefix_codepoint) | 2171 | 1.15k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2172 | 1.15k | advance_and_return_if_error_or_eof({}); | 2173 | | | 2174 | 1.15k | if (!traits::is_digit(*cp)) | 2175 | 1.15k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2176 | 1.15k | } | 2177 | | | 2178 | | // consume digits | 2179 | 1.12k | char digits[utf8_buffered_reader::max_history_length]; | 2180 | 1.15k | size_t length = {}; | 2181 | 1.15k | const utf8_codepoint* prev = {}; | 2182 | 7.26k | while (!is_eof() && !is_value_terminator(*cp)) | 2183 | 6.15k | { | 2184 | 6.15k | if (*cp == U'_') | 2185 | 858 | { | 2186 | 858 | if (!prev || !traits::is_digit(*prev)) | 2187 | 858 | set_error_and_return_default("underscores may only follow digits"sv); | 2188 | | | 2189 | 853 | prev = cp; | 2190 | 853 | advance_and_return_if_error_or_eof({}); | 2191 | 852 | continue; | 2192 | 853 | } | 2193 | 5.29k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2194 | 5.29k | set_error_and_return_default("underscores must be followed by digits"sv); | 2195 | 5.29k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2196 | 5.29k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2197 | 5.26k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2198 | 5.26k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2199 | 5.26k | else | 2200 | 5.26k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2201 | | | 2202 | 5.26k | prev = cp; | 2203 | 5.26k | advance_and_return_if_error({}); | 2204 | 5.26k | } | 2205 | | | 2206 | | // sanity check ending state | 2207 | 1.11k | if (prev && *prev == U'_') | 2208 | 1 | { | 2209 | 1 | set_error_and_return_if_eof({}); | 2210 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2211 | 1 | } | 2212 | | | 2213 | | // single digits can be converted trivially | 2214 | 1.11k | if (length == 1u) | 2215 | 209 | { | 2216 | 209 | int64_t result; | 2217 | | | 2218 | | if constexpr (base == 16) | 2219 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2220 | | else | 2221 | 209 | result = static_cast<int64_t>(digits[0] - '0'); | 2222 | | | 2223 | | if constexpr (traits::is_signed) | 2224 | | result *= sign; | 2225 | | | 2226 | 209 | return result; | 2227 | 209 | } | 2228 | | | 2229 | | // bin, oct and hex allow leading zeroes so trim them first | 2230 | 905 | const char* end = digits + length; | 2231 | 905 | const char* msd = digits; | 2232 | | if constexpr (base != 10) | 2233 | 905 | { | 2234 | 3.57k | while (msd < end && *msd == '0') | 2235 | 2.67k | msd++; | 2236 | 905 | if (msd == end) | 2237 | 463 | return 0ll; | 2238 | | } | 2239 | | | 2240 | | // decimal integers do not allow leading zeroes | 2241 | | else | 2242 | | { | 2243 | | if TOML_UNLIKELY(digits[0] == '0') | 2244 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2245 | | } | 2246 | | | 2247 | | // range check | 2248 | 905 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2249 | 905 | set_error_and_return_default("'"sv, | 2250 | 904 | traits::full_prefix, | 2251 | 904 | std::string_view{ digits, length }, | 2252 | 904 | "' is not representable as a signed 64-bit integer"sv); | 2253 | | | 2254 | | // do the thing | 2255 | 904 | { | 2256 | 904 | uint64_t result = {}; | 2257 | 904 | { | 2258 | 904 | uint64_t power = 1; | 2259 | 2.73k | while (--end >= msd) | 2260 | 1.83k | { | 2261 | | if constexpr (base == 16) | 2262 | | result += power * hex_to_dec(*end); | 2263 | | else | 2264 | 1.83k | result += power * static_cast<uint64_t>(*end - '0'); | 2265 | | | 2266 | 1.83k | power *= base; | 2267 | 1.83k | } | 2268 | 904 | } | 2269 | | | 2270 | | // range check | 2271 | 904 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2272 | 904 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2273 | 904 | set_error_and_return_default("'"sv, | 2274 | 904 | traits::full_prefix, | 2275 | 904 | std::string_view{ digits, length }, | 2276 | 904 | "' is not representable as a signed 64-bit integer"sv); | 2277 | | | 2278 | | if constexpr (traits::is_signed) | 2279 | | { | 2280 | | // avoid signed multiply UB when parsing INT64_MIN | 2281 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2282 | | return (std::numeric_limits<int64_t>::min)(); | 2283 | | | 2284 | | return static_cast<int64_t>(result) * sign; | 2285 | | } | 2286 | | else | 2287 | 904 | return static_cast<int64_t>(result); | 2288 | 904 | } | 2289 | 904 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<2ul>() Line | Count | Source | 2143 | 1.25k | { | 2144 | 1.25k | return_if_error({}); | 2145 | 1.25k | assert_not_eof(); | 2146 | 1.25k | using traits = parse_integer_traits<base>; | 2147 | 1.25k | push_parse_scope(traits::scope_qualifier); | 2148 | | | 2149 | 1.25k | [[maybe_unused]] int64_t sign = 1; | 2150 | | if constexpr (traits::is_signed) | 2151 | | { | 2152 | | sign = *cp == U'-' ? -1 : 1; | 2153 | | if (is_match(*cp, U'+', U'-')) | 2154 | | advance_and_return_if_error_or_eof({}); | 2155 | | } | 2156 | | | 2157 | | if constexpr (base == 10) | 2158 | | { | 2159 | | if (!traits::is_digit(*cp)) | 2160 | | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2161 | | } | 2162 | | else | 2163 | 1.25k | { | 2164 | | // '0' | 2165 | 1.25k | if (*cp != U'0') | 2166 | 1.25k | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2167 | 1.25k | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | | // 'b', 'o', 'x' | 2170 | 1.25k | if (*cp != traits::prefix_codepoint) | 2171 | 1.25k | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2172 | 1.25k | advance_and_return_if_error_or_eof({}); | 2173 | | | 2174 | 1.24k | if (!traits::is_digit(*cp)) | 2175 | 1.24k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2176 | 1.24k | } | 2177 | | | 2178 | | // consume digits | 2179 | 1.23k | char digits[utf8_buffered_reader::max_history_length]; | 2180 | 1.25k | size_t length = {}; | 2181 | 1.25k | const utf8_codepoint* prev = {}; | 2182 | 10.7k | while (!is_eof() && !is_value_terminator(*cp)) | 2183 | 9.54k | { | 2184 | 9.54k | if (*cp == U'_') | 2185 | 1.19k | { | 2186 | 1.19k | if (!prev || !traits::is_digit(*prev)) | 2187 | 1.19k | set_error_and_return_default("underscores may only follow digits"sv); | 2188 | | | 2189 | 1.19k | prev = cp; | 2190 | 1.19k | advance_and_return_if_error_or_eof({}); | 2191 | 1.18k | continue; | 2192 | 1.19k | } | 2193 | 8.34k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2194 | 8.34k | set_error_and_return_default("underscores must be followed by digits"sv); | 2195 | 8.33k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2196 | 8.33k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2197 | 8.31k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2198 | 8.31k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2199 | 8.31k | else | 2200 | 8.31k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2201 | | | 2202 | 8.31k | prev = cp; | 2203 | 8.31k | advance_and_return_if_error({}); | 2204 | 8.31k | } | 2205 | | | 2206 | | // sanity check ending state | 2207 | 1.21k | if (prev && *prev == U'_') | 2208 | 1 | { | 2209 | 1 | set_error_and_return_if_eof({}); | 2210 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2211 | 1 | } | 2212 | | | 2213 | | // single digits can be converted trivially | 2214 | 1.21k | if (length == 1u) | 2215 | 427 | { | 2216 | 427 | int64_t result; | 2217 | | | 2218 | | if constexpr (base == 16) | 2219 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2220 | | else | 2221 | 427 | result = static_cast<int64_t>(digits[0] - '0'); | 2222 | | | 2223 | | if constexpr (traits::is_signed) | 2224 | | result *= sign; | 2225 | | | 2226 | 427 | return result; | 2227 | 427 | } | 2228 | | | 2229 | | // bin, oct and hex allow leading zeroes so trim them first | 2230 | 783 | const char* end = digits + length; | 2231 | 783 | const char* msd = digits; | 2232 | | if constexpr (base != 10) | 2233 | 783 | { | 2234 | 4.07k | while (msd < end && *msd == '0') | 2235 | 3.28k | msd++; | 2236 | 783 | if (msd == end) | 2237 | 326 | return 0ll; | 2238 | | } | 2239 | | | 2240 | | // decimal integers do not allow leading zeroes | 2241 | | else | 2242 | | { | 2243 | | if TOML_UNLIKELY(digits[0] == '0') | 2244 | | set_error_and_return_default("leading zeroes are prohibited"sv); | 2245 | | } | 2246 | | | 2247 | | // range check | 2248 | 783 | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2249 | 783 | set_error_and_return_default("'"sv, | 2250 | 781 | traits::full_prefix, | 2251 | 781 | std::string_view{ digits, length }, | 2252 | 781 | "' is not representable as a signed 64-bit integer"sv); | 2253 | | | 2254 | | // do the thing | 2255 | 781 | { | 2256 | 781 | uint64_t result = {}; | 2257 | 781 | { | 2258 | 781 | uint64_t power = 1; | 2259 | 4.42k | while (--end >= msd) | 2260 | 3.64k | { | 2261 | | if constexpr (base == 16) | 2262 | | result += power * hex_to_dec(*end); | 2263 | | else | 2264 | 3.64k | result += power * static_cast<uint64_t>(*end - '0'); | 2265 | | | 2266 | 3.64k | power *= base; | 2267 | 3.64k | } | 2268 | 781 | } | 2269 | | | 2270 | | // range check | 2271 | 781 | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2272 | 781 | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2273 | 781 | set_error_and_return_default("'"sv, | 2274 | 781 | traits::full_prefix, | 2275 | 781 | std::string_view{ digits, length }, | 2276 | 781 | "' is not representable as a signed 64-bit integer"sv); | 2277 | | | 2278 | | if constexpr (traits::is_signed) | 2279 | | { | 2280 | | // avoid signed multiply UB when parsing INT64_MIN | 2281 | | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2282 | | return (std::numeric_limits<int64_t>::min)(); | 2283 | | | 2284 | | return static_cast<int64_t>(result) * sign; | 2285 | | } | 2286 | | else | 2287 | 781 | return static_cast<int64_t>(result); | 2288 | 781 | } | 2289 | 781 | } |
long toml::v3::impl::impl_ex::parser::parse_integer<10ul>() Line | Count | Source | 2143 | 31.4k | { | 2144 | 31.4k | return_if_error({}); | 2145 | 31.4k | assert_not_eof(); | 2146 | 31.4k | using traits = parse_integer_traits<base>; | 2147 | 31.4k | push_parse_scope(traits::scope_qualifier); | 2148 | | | 2149 | 31.4k | [[maybe_unused]] int64_t sign = 1; | 2150 | | if constexpr (traits::is_signed) | 2151 | 31.4k | { | 2152 | 31.4k | sign = *cp == U'-' ? -1 : 1; | 2153 | 31.4k | if (is_match(*cp, U'+', U'-')) | 2154 | 10.2k | advance_and_return_if_error_or_eof({}); | 2155 | 31.4k | } | 2156 | | | 2157 | | if constexpr (base == 10) | 2158 | 31.4k | { | 2159 | 31.4k | if (!traits::is_digit(*cp)) | 2160 | 31.4k | set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv); | 2161 | | } | 2162 | | else | 2163 | | { | 2164 | | // '0' | 2165 | | if (*cp != U'0') | 2166 | | set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv); | 2167 | | advance_and_return_if_error_or_eof({}); | 2168 | | | 2169 | | // 'b', 'o', 'x' | 2170 | | if (*cp != traits::prefix_codepoint) | 2171 | | set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv); | 2172 | | advance_and_return_if_error_or_eof({}); | 2173 | | | 2174 | | if (!traits::is_digit(*cp)) | 2175 | | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2176 | | } | 2177 | | | 2178 | | // consume digits | 2179 | 31.3k | char digits[utf8_buffered_reader::max_history_length]; | 2180 | 31.4k | size_t length = {}; | 2181 | 31.4k | const utf8_codepoint* prev = {}; | 2182 | 304k | while (!is_eof() && !is_value_terminator(*cp)) | 2183 | 273k | { | 2184 | 273k | if (*cp == U'_') | 2185 | 18.4k | { | 2186 | 18.4k | if (!prev || !traits::is_digit(*prev)) | 2187 | 18.4k | set_error_and_return_default("underscores may only follow digits"sv); | 2188 | | | 2189 | 18.4k | prev = cp; | 2190 | 18.4k | advance_and_return_if_error_or_eof({}); | 2191 | 18.4k | continue; | 2192 | 18.4k | } | 2193 | 254k | else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) | 2194 | 254k | set_error_and_return_default("underscores must be followed by digits"sv); | 2195 | 254k | else if TOML_UNLIKELY(!traits::is_digit(*cp)) | 2196 | 254k | set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); | 2197 | 254k | else if TOML_UNLIKELY(length == sizeof(digits)) | 2198 | 254k | set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv); | 2199 | 254k | else | 2200 | 254k | digits[length++] = static_cast<char>(cp->bytes[0]); | 2201 | | | 2202 | 254k | prev = cp; | 2203 | 254k | advance_and_return_if_error({}); | 2204 | 254k | } | 2205 | | | 2206 | | // sanity check ending state | 2207 | 31.3k | if (prev && *prev == U'_') | 2208 | 1 | { | 2209 | 1 | set_error_and_return_if_eof({}); | 2210 | 1 | set_error_and_return_default("underscores must be followed by digits"sv); | 2211 | 1 | } | 2212 | | | 2213 | | // single digits can be converted trivially | 2214 | 31.3k | if (length == 1u) | 2215 | 0 | { | 2216 | 0 | int64_t result; | 2217 | |
| 2218 | | if constexpr (base == 16) | 2219 | | result = static_cast<int64_t>(hex_to_dec(digits[0])); | 2220 | | else | 2221 | 0 | result = static_cast<int64_t>(digits[0] - '0'); | 2222 | |
| 2223 | | if constexpr (traits::is_signed) | 2224 | 0 | result *= sign; | 2225 | |
| 2226 | 0 | return result; | 2227 | 0 | } | 2228 | | | 2229 | | // bin, oct and hex allow leading zeroes so trim them first | 2230 | 31.3k | const char* end = digits + length; | 2231 | 31.3k | const char* msd = digits; | 2232 | | if constexpr (base != 10) | 2233 | | { | 2234 | | while (msd < end && *msd == '0') | 2235 | | msd++; | 2236 | | if (msd == end) | 2237 | | return 0ll; | 2238 | | } | 2239 | | | 2240 | | // decimal integers do not allow leading zeroes | 2241 | | else | 2242 | 31.3k | { | 2243 | 31.3k | if TOML_UNLIKELY(digits[0] == '0') | 2244 | 31.3k | set_error_and_return_default("leading zeroes are prohibited"sv); | 2245 | 31.3k | } | 2246 | | | 2247 | | // range check | 2248 | 31.3k | if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits) | 2249 | 31.3k | set_error_and_return_default("'"sv, | 2250 | 31.3k | traits::full_prefix, | 2251 | 31.3k | std::string_view{ digits, length }, | 2252 | 31.3k | "' is not representable as a signed 64-bit integer"sv); | 2253 | | | 2254 | | // do the thing | 2255 | 31.3k | { | 2256 | 31.3k | uint64_t result = {}; | 2257 | 31.3k | { | 2258 | 31.3k | uint64_t power = 1; | 2259 | 285k | while (--end >= msd) | 2260 | 254k | { | 2261 | | if constexpr (base == 16) | 2262 | | result += power * hex_to_dec(*end); | 2263 | | else | 2264 | 254k | result += power * static_cast<uint64_t>(*end - '0'); | 2265 | | | 2266 | 254k | power *= base; | 2267 | 254k | } | 2268 | 31.3k | } | 2269 | | | 2270 | | // range check | 2271 | 31.3k | static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()); | 2272 | 31.3k | if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u)) | 2273 | 31.3k | set_error_and_return_default("'"sv, | 2274 | 31.2k | traits::full_prefix, | 2275 | 31.2k | std::string_view{ digits, length }, | 2276 | 31.2k | "' is not representable as a signed 64-bit integer"sv); | 2277 | | | 2278 | | if constexpr (traits::is_signed) | 2279 | 31.2k | { | 2280 | | // avoid signed multiply UB when parsing INT64_MIN | 2281 | 31.2k | if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u) | 2282 | 163 | return (std::numeric_limits<int64_t>::min)(); | 2283 | | | 2284 | 31.1k | return static_cast<int64_t>(result) * sign; | 2285 | | } | 2286 | | else | 2287 | | return static_cast<int64_t>(result); | 2288 | 31.2k | } | 2289 | 31.2k | } |
|
2290 | | |
2291 | | TOML_NODISCARD |
2292 | | TOML_NEVER_INLINE |
2293 | | date parse_date(bool part_of_datetime = false) |
2294 | 7.09k | { |
2295 | 7.09k | return_if_error({}); |
2296 | 7.09k | assert_not_eof(); |
2297 | 7.09k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2298 | 7.09k | push_parse_scope("date"sv); |
2299 | | |
2300 | | // "YYYY" |
2301 | 7.09k | uint32_t digits[4]; |
2302 | 7.09k | if (!consume_digit_sequence(digits, 4u)) |
2303 | 7.09k | set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv); |
2304 | 7.05k | const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u + digits[0] * 1000u; |
2305 | 7.05k | const auto is_leap_year = (year % 4u == 0u) && ((year % 100u != 0u) || (year % 400u == 0u)); |
2306 | 7.05k | set_error_and_return_if_eof({}); |
2307 | | |
2308 | | // '-' |
2309 | 7.05k | if (*cp != U'-') |
2310 | 7.05k | set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv); |
2311 | 7.03k | advance_and_return_if_error_or_eof({}); |
2312 | | |
2313 | | // "MM" |
2314 | 7.03k | if (!consume_digit_sequence(digits, 2u)) |
2315 | 7.03k | set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv); |
2316 | 7.00k | const auto month = digits[1] + digits[0] * 10u; |
2317 | 7.00k | if (month == 0u || month > 12u) |
2318 | 7.00k | set_error_and_return_default("expected month between 1 and 12 (inclusive), saw "sv, month); |
2319 | 6.99k | const auto max_days_in_month = month == 2u |
2320 | 6.99k | ? (is_leap_year ? 29u : 28u) |
2321 | 6.99k | : (month == 4u || month == 6u || month == 9u || month == 11u ? 30u : 31u); |
2322 | 6.99k | set_error_and_return_if_eof({}); |
2323 | | |
2324 | | // '-' |
2325 | 6.99k | if (*cp != U'-') |
2326 | 6.99k | set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv); |
2327 | 6.97k | advance_and_return_if_error_or_eof({}); |
2328 | | |
2329 | | // "DD" |
2330 | 6.97k | if (!consume_digit_sequence(digits, 2u)) |
2331 | 6.97k | set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv); |
2332 | 6.96k | const auto day = digits[1] + digits[0] * 10u; |
2333 | 6.96k | if (day == 0u || day > max_days_in_month) |
2334 | 6.96k | set_error_and_return_default("expected day between 1 and "sv, |
2335 | 6.95k | max_days_in_month, |
2336 | 6.95k | " (inclusive), saw "sv, |
2337 | 6.95k | day); |
2338 | | |
2339 | 6.95k | if (!part_of_datetime && !is_eof() && !is_value_terminator(*cp)) |
2340 | 6.95k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2341 | | |
2342 | 6.94k | return { year, month, day }; |
2343 | 6.95k | } |
2344 | | |
2345 | | TOML_NODISCARD |
2346 | | TOML_NEVER_INLINE |
2347 | | time parse_time(bool part_of_datetime = false) |
2348 | 6.93k | { |
2349 | 6.93k | return_if_error({}); |
2350 | 6.93k | assert_not_eof(); |
2351 | 6.93k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2352 | 6.93k | push_parse_scope("time"sv); |
2353 | | |
2354 | 6.93k | static constexpr size_t max_digits = 64; // far more than necessary but needed to allow fractional |
2355 | | // millisecond truncation per the spec |
2356 | 6.93k | uint32_t digits[max_digits]; |
2357 | | |
2358 | | // "HH" |
2359 | 6.93k | if (!consume_digit_sequence(digits, 2u)) |
2360 | 6.93k | set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); |
2361 | 6.88k | const auto hour = digits[1] + digits[0] * 10u; |
2362 | 6.88k | if (hour > 23u) |
2363 | 6.88k | set_error_and_return_default("expected hour between 0 to 59 (inclusive), saw "sv, hour); |
2364 | 6.87k | set_error_and_return_if_eof({}); |
2365 | | |
2366 | | // ':' |
2367 | 6.87k | if (*cp != U':') |
2368 | 6.87k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2369 | 6.86k | advance_and_return_if_error_or_eof({}); |
2370 | | |
2371 | | // "MM" |
2372 | 6.86k | if (!consume_digit_sequence(digits, 2u)) |
2373 | 6.86k | set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); |
2374 | 6.84k | const auto minute = digits[1] + digits[0] * 10u; |
2375 | 6.84k | if (minute > 59u) |
2376 | 6.84k | set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute); |
2377 | 6.83k | auto time = toml::time{ hour, minute }; |
2378 | | |
2379 | | // ':' |
2380 | | if constexpr (TOML_LANG_UNRELEASED) // toml/issues/671 (allow omission of seconds) |
2381 | | { |
2382 | | if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z'))) |
2383 | | return time; |
2384 | | } |
2385 | | else |
2386 | 6.83k | set_error_and_return_if_eof({}); |
2387 | 6.83k | if (*cp != U':') |
2388 | 6.83k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2389 | 6.81k | advance_and_return_if_error_or_eof({}); |
2390 | | |
2391 | | // "SS" |
2392 | 6.81k | if (!consume_digit_sequence(digits, 2u)) |
2393 | 6.81k | set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv); |
2394 | 6.78k | const auto second = digits[1] + digits[0] * 10u; |
2395 | 6.78k | if (second > 59u) |
2396 | 6.78k | set_error_and_return_default("expected second between 0 and 59 (inclusive), saw "sv, second); |
2397 | 6.78k | time.second = static_cast<decltype(time.second)>(second); |
2398 | | |
2399 | | // '.' (early-exiting is allowed; fractional is optional) |
2400 | 6.78k | if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z'))) |
2401 | 3.90k | return time; |
2402 | 2.87k | if (*cp != U'.') |
2403 | 2.87k | set_error_and_return_default("expected '.', saw '"sv, to_sv(*cp), "'"sv); |
2404 | 2.85k | advance_and_return_if_error_or_eof({}); |
2405 | | |
2406 | | // "FFFFFFFFF" |
2407 | 2.85k | size_t digit_count = consume_variable_length_digit_sequence(digits, max_digits); |
2408 | 2.85k | if (!digit_count) |
2409 | 17 | { |
2410 | 17 | set_error_and_return_if_eof({}); |
2411 | 17 | set_error_and_return_default("expected fractional digits, saw '"sv, to_sv(*cp), "'"sv); |
2412 | 17 | } |
2413 | 2.83k | else if (!is_eof()) |
2414 | 2.80k | { |
2415 | 2.80k | if (digit_count == max_digits && is_decimal_digit(*cp)) |
2416 | 2.80k | set_error_and_return_default("fractional component exceeds maximum precision of "sv, max_digits); |
2417 | 2.80k | else if (!part_of_datetime && !is_value_terminator(*cp)) |
2418 | 5 | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2419 | 2.80k | } |
2420 | 2.82k | uint32_t value = 0u; |
2421 | 2.82k | uint32_t place = 1u; |
2422 | 14.7k | for (auto i = impl::min<size_t>(digit_count, 9u); i-- > 0u;) |
2423 | 11.9k | { |
2424 | 11.9k | value += digits[i] * place; |
2425 | 11.9k | place *= 10u; |
2426 | 11.9k | } |
2427 | 16.2k | for (auto i = digit_count; i < 9u; i++) // implicit zeros |
2428 | 13.3k | value *= 10u; |
2429 | 2.82k | time.nanosecond = value; |
2430 | 2.82k | return time; |
2431 | 2.85k | } |
2432 | | |
2433 | | TOML_NODISCARD |
2434 | | TOML_NEVER_INLINE |
2435 | | date_time parse_date_time() |
2436 | 4.56k | { |
2437 | 4.56k | return_if_error({}); |
2438 | 4.56k | assert_not_eof(); |
2439 | 4.56k | TOML_ASSERT_ASSUME(is_decimal_digit(*cp)); |
2440 | 4.56k | push_parse_scope("date-time"sv); |
2441 | | |
2442 | | // "YYYY-MM-DD" |
2443 | 4.56k | auto date = parse_date(true); |
2444 | 4.56k | set_error_and_return_if_eof({}); |
2445 | | |
2446 | | // ' ', 'T' or 't' |
2447 | 4.56k | if (!is_match(*cp, U' ', U'T', U't')) |
2448 | 4.56k | set_error_and_return_default("expected space, 'T' or 't', saw '"sv, to_sv(*cp), "'"sv); |
2449 | 4.55k | advance_and_return_if_error_or_eof({}); |
2450 | | |
2451 | | // "HH:MM:SS.FFFFFFFFF" |
2452 | 4.55k | auto time = parse_time(true); |
2453 | 4.55k | return_if_error({}); |
2454 | | |
2455 | | // no offset |
2456 | 4.55k | if (is_eof() || is_value_terminator(*cp)) |
2457 | 1.03k | return { date, time }; |
2458 | | |
2459 | | // zero offset ('Z' or 'z') |
2460 | 3.52k | time_offset offset{}; |
2461 | 3.52k | if (is_match(*cp, U'Z', U'z')) |
2462 | 1.29k | advance_and_return_if_error({}); |
2463 | | |
2464 | | // explicit offset ("+/-HH:MM") |
2465 | 2.22k | else if (is_match(*cp, U'+', U'-')) |
2466 | 2.13k | { |
2467 | 2.13k | push_parse_scope("date-time offset"sv); |
2468 | | |
2469 | | // sign |
2470 | 2.13k | int sign = *cp == U'-' ? -1 : 1; |
2471 | 2.13k | advance_and_return_if_error_or_eof({}); |
2472 | | |
2473 | | // "HH" |
2474 | 2.13k | int digits[2]; |
2475 | 2.13k | if (!consume_digit_sequence(digits, 2u)) |
2476 | 2.13k | set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); |
2477 | 2.11k | const auto hour = digits[1] + digits[0] * 10; |
2478 | 2.11k | if (hour > 23) |
2479 | 2.11k | set_error_and_return_default("expected hour between 0 and 23 (inclusive), saw "sv, hour); |
2480 | 2.11k | set_error_and_return_if_eof({}); |
2481 | | |
2482 | | // ':' |
2483 | 2.11k | if (*cp != U':') |
2484 | 2.11k | set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv); |
2485 | 2.10k | advance_and_return_if_error_or_eof({}); |
2486 | | |
2487 | | // "MM" |
2488 | 2.10k | if (!consume_digit_sequence(digits, 2u)) |
2489 | 2.10k | set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); |
2490 | 2.08k | const auto minute = digits[1] + digits[0] * 10; |
2491 | 2.08k | if (minute > 59) |
2492 | 2.08k | set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute); |
2493 | 2.08k | offset.minutes = static_cast<decltype(offset.minutes)>((hour * 60 + minute) * sign); |
2494 | 2.08k | } |
2495 | | |
2496 | 3.47k | if (!is_eof() && !is_value_terminator(*cp)) |
2497 | 3.47k | set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv); |
2498 | | |
2499 | 3.45k | return { date, time, offset }; |
2500 | 3.47k | } |
2501 | | |
2502 | | TOML_NODISCARD |
2503 | | node_ptr parse_array(); |
2504 | | |
2505 | | TOML_NODISCARD |
2506 | | node_ptr parse_inline_table(); |
2507 | | |
2508 | | TOML_NODISCARD |
2509 | | node_ptr parse_value_known_prefixes() |
2510 | 876k | { |
2511 | 876k | return_if_error({}); |
2512 | 876k | assert_not_eof(); |
2513 | 876k | TOML_ASSERT_ASSUME(!is_control_character(*cp)); |
2514 | 876k | TOML_ASSERT_ASSUME(*cp != U'_'); |
2515 | | |
2516 | 876k | switch (cp->value) |
2517 | 876k | { |
2518 | | // arrays |
2519 | 4.93k | case U'[': return parse_array(); |
2520 | | |
2521 | | // inline tables |
2522 | 7.56k | case U'{': return parse_inline_table(); |
2523 | | |
2524 | | // floats beginning with '.' |
2525 | 9 | case U'.': return node_ptr{ new value{ parse_float() } }; |
2526 | | |
2527 | | // strings |
2528 | 2.74k | case U'"': [[fallthrough]]; |
2529 | 6.84k | case U'\'': return node_ptr{ new value{ parse_string().value } }; |
2530 | | |
2531 | 857k | default: |
2532 | 857k | { |
2533 | 857k | const auto cp_upper = static_cast<uint_least32_t>(cp->value) & ~0x20u; |
2534 | | |
2535 | | // bools |
2536 | 857k | if (cp_upper == 70u || cp_upper == 84u) // F or T |
2537 | 1.71k | return node_ptr{ new value{ parse_boolean() } }; |
2538 | | |
2539 | | // inf/nan |
2540 | 855k | else if (cp_upper == 73u || cp_upper == 78u) // I or N |
2541 | 1.23k | return node_ptr{ new value{ parse_inf_or_nan() } }; |
2542 | | |
2543 | 854k | else |
2544 | 854k | return nullptr; |
2545 | 857k | } |
2546 | 876k | } |
2547 | 876k | TOML_UNREACHABLE; |
2548 | 876k | } |
2549 | | |
2550 | | TOML_NODISCARD |
2551 | | node_ptr parse_value() |
2552 | 876k | { |
2553 | 876k | return_if_error({}); |
2554 | 876k | assert_not_eof(); |
2555 | 876k | TOML_ASSERT_ASSUME(!is_value_terminator(*cp)); |
2556 | 876k | push_parse_scope("value"sv); |
2557 | | |
2558 | 876k | const depth_counter_scope depth_counter{ nested_values }; |
2559 | 876k | if TOML_UNLIKELY(nested_values > max_nested_values) |
2560 | 876k | set_error_and_return_default("exceeded maximum nested value depth of "sv, |
2561 | 876k | max_nested_values, |
2562 | 876k | " (TOML_MAX_NESTED_VALUES)"sv); |
2563 | | |
2564 | | // check if it begins with some control character |
2565 | | // (note that this will also fail for whitespace but we're assuming we've |
2566 | | // called consume_leading_whitespace() before calling parse_value()) |
2567 | 876k | if TOML_UNLIKELY(is_control_character(*cp)) |
2568 | 876k | set_error_and_return_default("unexpected control character"sv); |
2569 | | |
2570 | | // underscores at the beginning |
2571 | 876k | else if (*cp == U'_') |
2572 | 0 | set_error_and_return_default("values may not begin with underscores"sv); |
2573 | | |
2574 | 876k | const auto begin_pos = cp->position; |
2575 | 876k | node_ptr val; |
2576 | | |
2577 | 876k | do |
2578 | 876k | { |
2579 | 876k | TOML_ASSERT_ASSUME(!is_control_character(*cp)); |
2580 | 876k | TOML_ASSERT_ASSUME(*cp != U'_'); |
2581 | | |
2582 | | // detect the value type and parse accordingly, |
2583 | | // starting with value types that can be detected |
2584 | | // unambiguously from just one character. |
2585 | | |
2586 | 876k | val = parse_value_known_prefixes(); |
2587 | 876k | return_if_error({}); |
2588 | 876k | if (val) |
2589 | 18.7k | break; |
2590 | | |
2591 | | // value types from here down require more than one character to unambiguously identify |
2592 | | // so scan ahead and collect a set of value 'traits'. |
2593 | 857k | enum TOML_CLOSED_FLAGS_ENUM value_traits : int |
2594 | 857k | { |
2595 | 857k | has_nothing = 0, |
2596 | 857k | has_digits = 1, |
2597 | 857k | has_b = 1 << 1, // as second char only (0b) |
2598 | 857k | has_e = 1 << 2, // only float exponents |
2599 | 857k | has_o = 1 << 3, // as second char only (0o) |
2600 | 857k | has_p = 1 << 4, // only hexfloat exponents |
2601 | 857k | has_t = 1 << 5, |
2602 | 857k | has_x = 1 << 6, // as second or third char only (0x, -0x, +0x) |
2603 | 857k | has_z = 1 << 7, |
2604 | 857k | has_colon = 1 << 8, |
2605 | 857k | has_plus = 1 << 9, |
2606 | 857k | has_minus = 1 << 10, |
2607 | 857k | has_dot = 1 << 11, |
2608 | 857k | begins_sign = 1 << 12, |
2609 | 857k | begins_digit = 1 << 13, |
2610 | 857k | begins_zero = 1 << 14, |
2611 | | |
2612 | 857k | signs_msk = has_plus | has_minus, |
2613 | 857k | bdigit_msk = has_digits | begins_digit, |
2614 | 857k | bzero_msk = bdigit_msk | begins_zero, |
2615 | 857k | }; |
2616 | 857k | value_traits traits = has_nothing; |
2617 | 1.07M | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; };auto toml::v3::impl::impl_ex::parser::parse_value()::{lambda(auto:1)#1}::operator()<toml::v3::impl::impl_ex::parser::parse_value()::value_traits>(toml::v3::impl::impl_ex::parser::parse_value()::value_traits) constLine | Count | Source | 2617 | 1.01M | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; }; |
auto toml::v3::impl::impl_ex::parser::parse_value()::{lambda(auto:1)#1}::operator()<int>(int) constLine | Count | Source | 2617 | 56.0k | const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; }; |
|
2618 | 857k | const auto has_none = [&](auto t) noexcept { return (traits & t) == has_nothing; }; |
2619 | 2.19M | const auto add_trait = [&](auto t) noexcept { traits = static_cast<value_traits>(traits | t); }; |
2620 | | |
2621 | | // examine the first character to get the 'begins with' traits |
2622 | | // (good fail-fast opportunity; all the remaining types begin with numeric digits or signs) |
2623 | 857k | if (is_decimal_digit(*cp)) |
2624 | 838k | { |
2625 | 838k | add_trait(begins_digit); |
2626 | 838k | if (*cp == U'0') |
2627 | 16.7k | add_trait(begins_zero); |
2628 | 838k | } |
2629 | 18.9k | else if (is_match(*cp, U'+', U'-')) |
2630 | 15.2k | add_trait(begins_sign); |
2631 | 3.60k | else |
2632 | 3.60k | break; |
2633 | | |
2634 | | // scan the rest of the value to determine the remaining traits |
2635 | 854k | char32_t chars[utf8_buffered_reader::max_history_length]; |
2636 | 854k | size_t char_count = {}, advance_count = {}; |
2637 | 854k | bool eof_while_scanning = false; |
2638 | 854k | const auto scan = [&]() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) |
2639 | 856k | { |
2640 | 856k | if (is_eof()) |
2641 | 0 | return; |
2642 | 856k | TOML_ASSERT_ASSUME(!is_value_terminator(*cp)); |
2643 | | |
2644 | 856k | do |
2645 | 1.36M | { |
2646 | 1.36M | if (const auto c = **cp; c != U'_') |
2647 | 1.33M | { |
2648 | 1.33M | chars[char_count++] = c; |
2649 | | |
2650 | 1.33M | if (is_decimal_digit(c)) |
2651 | 1.25M | add_trait(has_digits); |
2652 | 72.7k | else if (is_ascii_letter(c)) |
2653 | 13.8k | { |
2654 | 13.8k | TOML_ASSERT_ASSUME((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z')); |
2655 | 13.8k | switch (static_cast<char32_t>(c | 32u)) |
2656 | 13.8k | { |
2657 | 1.74k | case U'b': |
2658 | 1.74k | if (char_count == 2u && has_any(begins_zero)) |
2659 | 1.25k | add_trait(has_b); |
2660 | 1.74k | break; |
2661 | | |
2662 | 1.78k | case U'e': |
2663 | 1.78k | if (char_count > 1u |
2664 | 1.78k | && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) |
2665 | 1.52k | && (has_none(has_plus | has_minus) || has_any(begins_sign))) |
2666 | 1.37k | add_trait(has_e); |
2667 | 1.78k | break; |
2668 | | |
2669 | 1.26k | case U'o': |
2670 | 1.26k | if (char_count == 2u && has_any(begins_zero)) |
2671 | 1.15k | add_trait(has_o); |
2672 | 1.26k | break; |
2673 | | |
2674 | 97 | case U'p': |
2675 | 97 | if (has_any(has_x)) |
2676 | 31 | add_trait(has_p); |
2677 | 97 | break; |
2678 | | |
2679 | 2.29k | case U'x': |
2680 | 2.29k | if ((char_count == 2u && has_any(begins_zero)) |
2681 | 248 | || (char_count == 3u && has_any(begins_sign) && chars[1] == U'0')) |
2682 | 2.05k | add_trait(has_x); |
2683 | 2.29k | break; |
2684 | | |
2685 | 2.10k | case U't': add_trait(has_t); break; |
2686 | 1.45k | case U'z': add_trait(has_z); break; |
2687 | 13.8k | } |
2688 | 13.8k | } |
2689 | 58.8k | else if (c <= U':') |
2690 | 58.0k | { |
2691 | 58.0k | TOML_ASSERT_ASSUME(c < U'0' || c > U'9'); |
2692 | 58.0k | switch (c) |
2693 | 58.0k | { |
2694 | 4.80k | case U'+': add_trait(has_plus); break; |
2695 | 27.7k | case U'-': add_trait(has_minus); break; |
2696 | 9.09k | case U'.': add_trait(has_dot); break; |
2697 | 16.0k | case U':': add_trait(has_colon); break; |
2698 | 58.0k | } |
2699 | 58.0k | } |
2700 | 1.33M | } |
2701 | | |
2702 | 1.36M | advance_and_return_if_error(); |
2703 | 1.36M | advance_count++; |
2704 | 1.36M | eof_while_scanning = is_eof(); |
2705 | 1.36M | } |
2706 | 1.36M | while (advance_count < (utf8_buffered_reader::max_history_length - 1u) && !is_eof() |
2707 | 1.36M | && !is_value_terminator(*cp)); |
2708 | 856k | }; |
2709 | 854k | scan(); |
2710 | 854k | return_if_error({}); |
2711 | | |
2712 | | // force further scanning if this could have been a date-time with a space instead of a T |
2713 | 854k | if (char_count == 10u // |
2714 | 7.32k | && (traits | begins_zero) == (bzero_msk | has_minus) // |
2715 | 4.99k | && chars[4] == U'-' // |
2716 | 4.98k | && chars[7] == U'-' // |
2717 | 4.97k | && !is_eof() // |
2718 | 4.96k | && *cp == U' ') |
2719 | 3.50k | { |
2720 | 3.50k | const auto pre_advance_count = advance_count; |
2721 | 3.50k | const auto pre_scan_traits = traits; |
2722 | 3.50k | chars[char_count++] = *cp; |
2723 | 3.50k | add_trait(has_t); |
2724 | | |
2725 | 3.50k | const auto backpedal = [&]() noexcept |
2726 | 3.50k | { |
2727 | 931 | go_back(advance_count - pre_advance_count); |
2728 | 931 | advance_count = pre_advance_count; |
2729 | 931 | traits = pre_scan_traits; |
2730 | 931 | char_count = 10u; |
2731 | 931 | }; |
2732 | | |
2733 | 3.50k | advance_and_return_if_error({}); |
2734 | 3.50k | advance_count++; |
2735 | | |
2736 | 3.50k | if (is_eof() || !is_decimal_digit(*cp)) |
2737 | 931 | backpedal(); |
2738 | 2.56k | else |
2739 | 2.56k | { |
2740 | 2.56k | chars[char_count++] = *cp; |
2741 | | |
2742 | 2.56k | advance_and_return_if_error({}); |
2743 | 2.56k | advance_count++; |
2744 | | |
2745 | 2.56k | scan(); |
2746 | 2.56k | return_if_error({}); |
2747 | | |
2748 | 2.56k | if (char_count == 12u) |
2749 | 0 | backpedal(); |
2750 | 2.56k | } |
2751 | 3.50k | } |
2752 | | |
2753 | | // set the reader back to where we started |
2754 | 854k | go_back(advance_count); |
2755 | | |
2756 | | // if after scanning ahead we still only have one value character, |
2757 | | // the only valid value type is an integer. |
2758 | 854k | if (char_count == 1u) |
2759 | 798k | { |
2760 | 798k | if (has_any(begins_digit)) |
2761 | 798k | { |
2762 | 798k | val.reset(new value{ static_cast<int64_t>(chars[0] - U'0') }); |
2763 | 798k | advance(); // skip the digit |
2764 | 798k | break; |
2765 | 798k | } |
2766 | | |
2767 | | // anything else would be ambiguous. |
2768 | 3 | else |
2769 | 798k | set_error_and_return_default(eof_while_scanning ? "encountered end-of-file"sv |
2770 | 798k | : "could not determine value type"sv); |
2771 | 798k | } |
2772 | | |
2773 | | // now things that can be identified from two or more characters |
2774 | 56.0k | return_if_error({}); |
2775 | 56.0k | TOML_ASSERT_ASSUME(char_count >= 2u); |
2776 | | |
2777 | | // do some 'fuzzy matching' where there's no ambiguity, since that allows the specific |
2778 | | // typed parse functions to take over and show better diagnostics if there's an issue |
2779 | | // (as opposed to the fallback "could not determine type" message) |
2780 | 56.0k | if (has_any(has_p)) |
2781 | 6 | val.reset(new value{ parse_hex_float() }); |
2782 | 56.0k | else if (has_any(has_x | has_o | has_b)) |
2783 | 4.45k | { |
2784 | 4.45k | int64_t i; |
2785 | 4.45k | value_flags flags; |
2786 | 4.45k | if (has_any(has_x)) |
2787 | 2.04k | { |
2788 | 2.04k | i = parse_integer<16>(); |
2789 | 2.04k | flags = value_flags::format_as_hexadecimal; |
2790 | 2.04k | } |
2791 | 2.40k | else if (has_any(has_o)) |
2792 | 1.15k | { |
2793 | 1.15k | i = parse_integer<8>(); |
2794 | 1.15k | flags = value_flags::format_as_octal; |
2795 | 1.15k | } |
2796 | 1.25k | else // has_b |
2797 | 1.25k | { |
2798 | 1.25k | i = parse_integer<2>(); |
2799 | 1.25k | flags = value_flags::format_as_binary; |
2800 | 1.25k | } |
2801 | 4.45k | return_if_error({}); |
2802 | | |
2803 | 4.45k | val.reset(new value{ i }); |
2804 | 4.45k | val->ref_cast<int64_t>().flags(flags); |
2805 | 4.45k | } |
2806 | 51.5k | else if (has_any(has_e) || (has_any(begins_digit) && chars[1] == U'.')) |
2807 | 5.13k | val.reset(new value{ parse_float() }); |
2808 | 46.4k | else if (has_any(begins_sign)) |
2809 | 14.8k | { |
2810 | | // single-digit signed integers |
2811 | 14.8k | if (char_count == 2u && has_any(has_digits)) |
2812 | 2.55k | { |
2813 | 2.55k | val.reset(new value{ static_cast<int64_t>(chars[1] - U'0') * (chars[0] == U'-' ? -1LL : 1LL) }); |
2814 | 2.55k | advance(); // skip the sign |
2815 | 2.55k | advance(); // skip the digit |
2816 | 2.55k | break; |
2817 | 2.55k | } |
2818 | | |
2819 | | // simple signed floats (e.g. +1.0) |
2820 | 12.3k | if (is_decimal_digit(chars[1]) && chars[2] == U'.') |
2821 | 450 | val.reset(new value{ parse_float() }); |
2822 | | |
2823 | | // signed infinity or nan |
2824 | 11.8k | else if (is_match(chars[1], U'i', U'n', U'I', U'N')) |
2825 | 533 | val.reset(new value{ parse_inf_or_nan() }); |
2826 | 12.3k | } |
2827 | | |
2828 | 53.4k | return_if_error({}); |
2829 | 53.4k | if (val) |
2830 | 10.2k | break; |
2831 | | |
2832 | | // match trait masks against what they can match exclusively. |
2833 | | // all correct value parses will come out of this list, so doing this as a switch is likely to |
2834 | | // be a better friend to the optimizer on the success path (failure path can be slow but that |
2835 | | // doesn't matter much). |
2836 | 43.2k | switch (unwrap_enum(traits)) |
2837 | 43.2k | { |
2838 | | // binary integers |
2839 | | // 0b10 |
2840 | 0 | case bzero_msk | has_b: |
2841 | 0 | val.reset(new value{ parse_integer<2>() }); |
2842 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_binary); |
2843 | 0 | break; |
2844 | | |
2845 | | // octal integers |
2846 | | // 0o10 |
2847 | 0 | case bzero_msk | has_o: |
2848 | 0 | val.reset(new value{ parse_integer<8>() }); |
2849 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_octal); |
2850 | 0 | break; |
2851 | | |
2852 | | // decimal integers |
2853 | | // 00 |
2854 | | // 10 |
2855 | | // +10 |
2856 | | // -10 |
2857 | 30 | case bzero_msk: [[fallthrough]]; |
2858 | 21.1k | case bdigit_msk: [[fallthrough]]; |
2859 | 30.4k | case begins_sign | has_digits | has_minus: [[fallthrough]]; |
2860 | 31.4k | case begins_sign | has_digits | has_plus: |
2861 | 31.4k | { |
2862 | | // if the value was so long we exhausted the history buffer it's reasonable to assume |
2863 | | // there was more and the value's actual type is impossible to identify without making the |
2864 | | // buffer bigger (since it could have actually been a float), so emit an error. |
2865 | | // |
2866 | | // (this will likely only come up during fuzzing and similar scenarios) |
2867 | 31.4k | static constexpr size_t max_numeric_value_length = |
2868 | 31.4k | utf8_buffered_reader::max_history_length - 2u; |
2869 | 31.4k | if TOML_UNLIKELY(!eof_while_scanning && advance_count > max_numeric_value_length) |
2870 | 31.4k | set_error_and_return_default("numeric value too long to identify type - cannot exceed "sv, |
2871 | 31.4k | max_numeric_value_length, |
2872 | 31.4k | " characters"sv); |
2873 | | |
2874 | 31.4k | val.reset(new value{ parse_integer<10>() }); |
2875 | 31.4k | break; |
2876 | 31.4k | } |
2877 | | |
2878 | | // hexadecimal integers |
2879 | | // 0x10 |
2880 | 0 | case bzero_msk | has_x: |
2881 | 0 | val.reset(new value{ parse_integer<16>() }); |
2882 | 0 | val->ref_cast<int64_t>().flags(value_flags::format_as_hexadecimal); |
2883 | 0 | break; |
2884 | | |
2885 | | // decimal floats |
2886 | | // 0e1 |
2887 | | // 0e-1 |
2888 | | // 0e+1 |
2889 | | // 0.0 |
2890 | | // 0.0e1 |
2891 | | // 0.0e-1 |
2892 | | // 0.0e+1 |
2893 | 0 | case bzero_msk | has_e: [[fallthrough]]; |
2894 | 0 | case bzero_msk | has_e | has_minus: [[fallthrough]]; |
2895 | 0 | case bzero_msk | has_e | has_plus: [[fallthrough]]; |
2896 | 2 | case bzero_msk | has_dot: [[fallthrough]]; |
2897 | 2 | case bzero_msk | has_dot | has_e: [[fallthrough]]; |
2898 | 2 | case bzero_msk | has_dot | has_e | has_minus: [[fallthrough]]; |
2899 | 2 | case bzero_msk | has_dot | has_e | has_plus: [[fallthrough]]; |
2900 | | // 1e1 |
2901 | | // 1e-1 |
2902 | | // 1e+1 |
2903 | | // 1.0 |
2904 | | // 1.0e1 |
2905 | | // 1.0e-1 |
2906 | | // 1.0e+1 |
2907 | 2 | case bdigit_msk | has_e: [[fallthrough]]; |
2908 | 2 | case bdigit_msk | has_e | has_minus: [[fallthrough]]; |
2909 | 2 | case bdigit_msk | has_e | has_plus: [[fallthrough]]; |
2910 | 796 | case bdigit_msk | has_dot: [[fallthrough]]; |
2911 | 796 | case bdigit_msk | has_dot | has_e: [[fallthrough]]; |
2912 | 796 | case bdigit_msk | has_dot | has_e | has_minus: [[fallthrough]]; |
2913 | 796 | case bdigit_msk | has_dot | has_e | has_plus: [[fallthrough]]; |
2914 | | // +1e1 |
2915 | | // +1.0 |
2916 | | // +1.0e1 |
2917 | | // +1.0e+1 |
2918 | | // +1.0e-1 |
2919 | | // -1.0e+1 |
2920 | 796 | case begins_sign | has_digits | has_e | has_plus: [[fallthrough]]; |
2921 | 1.25k | case begins_sign | has_digits | has_dot | has_plus: [[fallthrough]]; |
2922 | 1.25k | case begins_sign | has_digits | has_dot | has_e | has_plus: [[fallthrough]]; |
2923 | 1.25k | case begins_sign | has_digits | has_dot | has_e | signs_msk: [[fallthrough]]; |
2924 | | // -1e1 |
2925 | | // -1e+1 |
2926 | | // +1e-1 |
2927 | | // -1.0 |
2928 | | // -1.0e1 |
2929 | | // -1.0e-1 |
2930 | 1.25k | case begins_sign | has_digits | has_e | has_minus: [[fallthrough]]; |
2931 | 1.25k | case begins_sign | has_digits | has_e | signs_msk: [[fallthrough]]; |
2932 | 1.84k | case begins_sign | has_digits | has_dot | has_minus: [[fallthrough]]; |
2933 | 1.84k | case begins_sign | has_digits | has_dot | has_e | has_minus: |
2934 | 1.84k | val.reset(new value{ parse_float() }); |
2935 | 1.84k | break; |
2936 | | |
2937 | | // hexadecimal floats |
2938 | | // 0x10p0 |
2939 | | // 0x10p-0 |
2940 | | // 0x10p+0 |
2941 | 0 | case bzero_msk | has_x | has_p: [[fallthrough]]; |
2942 | 0 | case bzero_msk | has_x | has_p | has_minus: [[fallthrough]]; |
2943 | 0 | case bzero_msk | has_x | has_p | has_plus: [[fallthrough]]; |
2944 | | // -0x10p0 |
2945 | | // -0x10p-0 |
2946 | | // +0x10p0 |
2947 | | // +0x10p+0 |
2948 | | // -0x10p+0 |
2949 | | // +0x10p-0 |
2950 | 0 | case begins_sign | has_digits | has_x | has_p | has_minus: [[fallthrough]]; |
2951 | 0 | case begins_sign | has_digits | has_x | has_p | has_plus: [[fallthrough]]; |
2952 | 0 | case begins_sign | has_digits | has_x | has_p | signs_msk: [[fallthrough]]; |
2953 | | // 0x10.1p0 |
2954 | | // 0x10.1p-0 |
2955 | | // 0x10.1p+0 |
2956 | 0 | case bzero_msk | has_x | has_dot | has_p: [[fallthrough]]; |
2957 | 0 | case bzero_msk | has_x | has_dot | has_p | has_minus: [[fallthrough]]; |
2958 | 0 | case bzero_msk | has_x | has_dot | has_p | has_plus: [[fallthrough]]; |
2959 | | // -0x10.1p0 |
2960 | | // -0x10.1p-0 |
2961 | | // +0x10.1p0 |
2962 | | // +0x10.1p+0 |
2963 | | // -0x10.1p+0 |
2964 | | // +0x10.1p-0 |
2965 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | has_minus: [[fallthrough]]; |
2966 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | has_plus: [[fallthrough]]; |
2967 | 0 | case begins_sign | has_digits | has_x | has_dot | has_p | signs_msk: |
2968 | 0 | val.reset(new value{ parse_hex_float() }); |
2969 | 0 | break; |
2970 | | |
2971 | | // times |
2972 | | // HH:MM |
2973 | | // HH:MM:SS |
2974 | | // HH:MM:SS.FFFFFF |
2975 | 547 | case bzero_msk | has_colon: [[fallthrough]]; |
2976 | 1.06k | case bzero_msk | has_colon | has_dot: [[fallthrough]]; |
2977 | 1.79k | case bdigit_msk | has_colon: [[fallthrough]]; |
2978 | 2.40k | case bdigit_msk | has_colon | has_dot: val.reset(new value{ parse_time() }); break; |
2979 | | |
2980 | | // local dates |
2981 | | // YYYY-MM-DD |
2982 | 595 | case bzero_msk | has_minus: [[fallthrough]]; |
2983 | 2.52k | case bdigit_msk | has_minus: val.reset(new value{ parse_date() }); break; |
2984 | | |
2985 | | // date-times |
2986 | | // YYYY-MM-DDTHH:MM |
2987 | | // YYYY-MM-DDTHH:MM-HH:MM |
2988 | | // YYYY-MM-DDTHH:MM+HH:MM |
2989 | | // YYYY-MM-DD HH:MM |
2990 | | // YYYY-MM-DD HH:MM-HH:MM |
2991 | | // YYYY-MM-DD HH:MM+HH:MM |
2992 | | // YYYY-MM-DDTHH:MM:SS |
2993 | | // YYYY-MM-DDTHH:MM:SS-HH:MM |
2994 | | // YYYY-MM-DDTHH:MM:SS+HH:MM |
2995 | | // YYYY-MM-DD HH:MM:SS |
2996 | | // YYYY-MM-DD HH:MM:SS-HH:MM |
2997 | | // YYYY-MM-DD HH:MM:SS+HH:MM |
2998 | 281 | case bzero_msk | has_minus | has_colon | has_t: [[fallthrough]]; |
2999 | 654 | case bzero_msk | signs_msk | has_colon | has_t: [[fallthrough]]; |
3000 | 1.59k | case bdigit_msk | has_minus | has_colon | has_t: [[fallthrough]]; |
3001 | 2.18k | case bdigit_msk | signs_msk | has_colon | has_t: [[fallthrough]]; |
3002 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF |
3003 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF-HH:MM |
3004 | | // YYYY-MM-DDTHH:MM:SS.FFFFFF+HH:MM |
3005 | | // YYYY-MM-DD HH:MM:SS.FFFFFF |
3006 | | // YYYY-MM-DD HH:MM:SS.FFFFFF-HH:MM |
3007 | | // YYYY-MM-DD HH:MM:SS.FFFFFF+HH:MM |
3008 | 2.40k | case bzero_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]]; |
3009 | 2.63k | case bzero_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]]; |
3010 | 2.94k | case bdigit_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]]; |
3011 | 3.24k | case bdigit_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]]; |
3012 | | // YYYY-MM-DDTHH:MMZ |
3013 | | // YYYY-MM-DD HH:MMZ |
3014 | | // YYYY-MM-DDTHH:MM:SSZ |
3015 | | // YYYY-MM-DD HH:MM:SSZ |
3016 | | // YYYY-MM-DDTHH:MM:SS.FFFFFFZ |
3017 | | // YYYY-MM-DD HH:MM:SS.FFFFFFZ |
3018 | 3.60k | case bzero_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]]; |
3019 | 4.08k | case bzero_msk | has_minus | has_colon | has_dot | has_z | has_t: [[fallthrough]]; |
3020 | 4.36k | case bdigit_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]]; |
3021 | 4.56k | case bdigit_msk | has_minus | has_colon | has_dot | has_z | has_t: |
3022 | 4.56k | val.reset(new value{ parse_date_time() }); |
3023 | 4.56k | break; |
3024 | 43.2k | } |
3025 | 43.2k | } |
3026 | 876k | while (false); |
3027 | | |
3028 | 875k | if (!val) |
3029 | 195 | { |
3030 | 195 | set_error_at(begin_pos, "could not determine value type"sv); |
3031 | 195 | return_after_error({}); |
3032 | 195 | } |
3033 | | |
3034 | 875k | val->source_ = { begin_pos, current_position(1), reader.source_path() }; |
3035 | 875k | return val; |
3036 | 875k | } |
3037 | | |
3038 | | TOML_NEVER_INLINE |
3039 | | bool parse_key() |
3040 | 103k | { |
3041 | 103k | return_if_error({}); |
3042 | 103k | assert_not_eof(); |
3043 | 103k | TOML_ASSERT_ASSUME(is_bare_key_character(*cp) || is_string_delimiter(*cp)); |
3044 | 103k | push_parse_scope("key"sv); |
3045 | | |
3046 | 103k | key_buffer.clear(); |
3047 | 103k | recording_whitespace = false; |
3048 | | |
3049 | 697k | while (!is_error()) |
3050 | 697k | { |
3051 | 697k | std::string_view key_segment; |
3052 | 697k | const auto key_begin = current_position(); |
3053 | | |
3054 | | // bare_key_segment |
3055 | 697k | if (is_bare_key_character(*cp)) |
3056 | 693k | key_segment = parse_bare_key_segment(); |
3057 | | |
3058 | | // "quoted key segment" |
3059 | 3.81k | else if (is_string_delimiter(*cp)) |
3060 | 3.81k | { |
3061 | 3.81k | const auto begin_pos = cp->position; |
3062 | | |
3063 | 3.81k | recording_whitespace = true; |
3064 | 3.81k | parsed_string str = parse_string(); |
3065 | 3.81k | recording_whitespace = false; |
3066 | 3.81k | return_if_error({}); |
3067 | | |
3068 | 3.81k | if (str.was_multi_line) |
3069 | 12 | { |
3070 | 12 | set_error_at(begin_pos, |
3071 | 12 | "multi-line strings are prohibited in "sv, |
3072 | 12 | key_buffer.empty() ? ""sv : "dotted "sv, |
3073 | 12 | "keys"sv); |
3074 | 12 | return_after_error({}); |
3075 | 12 | } |
3076 | 3.79k | else |
3077 | 3.79k | key_segment = str.value; |
3078 | 3.81k | } |
3079 | | |
3080 | | // ??? |
3081 | 5 | else |
3082 | 5 | set_error_and_return_default("expected bare key starting character or string delimiter, saw '"sv, |
3083 | 697k | to_sv(*cp), |
3084 | 697k | "'"sv); |
3085 | | |
3086 | 697k | const auto key_end = current_position(); |
3087 | | |
3088 | | // whitespace following the key segment |
3089 | 697k | consume_leading_whitespace(); |
3090 | | |
3091 | | // store segment |
3092 | 697k | key_buffer.push_back(key_segment, key_begin, key_end); |
3093 | | |
3094 | 697k | if TOML_UNLIKELY(key_buffer.size() > max_dotted_keys_depth) |
3095 | 697k | set_error_and_return_default("exceeded maximum dotted keys depth of "sv, |
3096 | 697k | max_dotted_keys_depth, |
3097 | 697k | " (TOML_MAX_DOTTED_KEYS_DEPTH)"sv); |
3098 | | |
3099 | | // eof or no more key to come |
3100 | 697k | if (is_eof() || *cp != U'.') |
3101 | 102k | break; |
3102 | | |
3103 | | // was a dotted key - go around again |
3104 | 594k | advance_and_return_if_error_or_eof({}); |
3105 | 594k | consume_leading_whitespace(); |
3106 | 594k | set_error_and_return_if_eof({}); |
3107 | 594k | } |
3108 | 103k | return_if_error({}); |
3109 | | |
3110 | 103k | return true; |
3111 | 103k | } |
3112 | | |
3113 | | TOML_NODISCARD |
3114 | | key make_key(size_t segment_index) const |
3115 | 590k | { |
3116 | 590k | TOML_ASSERT(key_buffer.size() > segment_index); |
3117 | | |
3118 | 590k | return key{ |
3119 | 590k | key_buffer[segment_index], |
3120 | 590k | source_region{ key_buffer.starts[segment_index], key_buffer.ends[segment_index], root.source().path } |
3121 | 590k | }; |
3122 | 590k | } |
3123 | | |
3124 | | TOML_NODISCARD |
3125 | | TOML_NEVER_INLINE |
3126 | | table* parse_table_header() |
3127 | 67.4k | { |
3128 | 67.4k | return_if_error({}); |
3129 | 67.4k | assert_not_eof(); |
3130 | 67.4k | TOML_ASSERT_ASSUME(*cp == U'['); |
3131 | 67.4k | push_parse_scope("table header"sv); |
3132 | | |
3133 | 67.4k | const source_position header_begin_pos = cp->position; |
3134 | 67.4k | source_position header_end_pos; |
3135 | 67.4k | bool is_arr = false; |
3136 | | |
3137 | | // parse header |
3138 | 67.4k | { |
3139 | | // skip first '[' |
3140 | 67.4k | advance_and_return_if_error_or_eof({}); |
3141 | | |
3142 | | // skip past any whitespace that followed the '[' |
3143 | 67.4k | const bool had_leading_whitespace = consume_leading_whitespace(); |
3144 | 67.4k | set_error_and_return_if_eof({}); |
3145 | | |
3146 | | // skip second '[' (if present) |
3147 | 67.4k | if (*cp == U'[') |
3148 | 59.1k | { |
3149 | 59.1k | if (had_leading_whitespace) |
3150 | 59.1k | set_error_and_return_default( |
3151 | 59.1k | "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv); |
3152 | | |
3153 | 59.1k | is_arr = true; |
3154 | 59.1k | advance_and_return_if_error_or_eof({}); |
3155 | | |
3156 | | // skip past any whitespace that followed the '[' |
3157 | 59.1k | consume_leading_whitespace(); |
3158 | 59.1k | set_error_and_return_if_eof({}); |
3159 | 59.1k | } |
3160 | | |
3161 | | // check for a premature closing ']' |
3162 | 67.4k | if (*cp == U']') |
3163 | 67.4k | set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv); |
3164 | | |
3165 | 67.4k | if (!is_bare_key_character(*cp) && !is_string_delimiter(*cp)) |
3166 | 67.4k | set_error_and_return_default("expected bare key starting character or string delimiter, saw '"sv, |
3167 | 67.3k | to_sv(*cp), |
3168 | 67.3k | "'"sv); |
3169 | | |
3170 | | // get the actual key |
3171 | 67.3k | start_recording(); |
3172 | 67.3k | parse_key(); |
3173 | 67.3k | stop_recording(1u); |
3174 | 67.3k | return_if_error({}); |
3175 | | |
3176 | | // skip past any whitespace that followed the key |
3177 | 67.3k | consume_leading_whitespace(); |
3178 | 67.3k | return_if_error({}); |
3179 | 67.3k | set_error_and_return_if_eof({}); |
3180 | | |
3181 | | // consume the closing ']' |
3182 | 67.3k | if (*cp != U']') |
3183 | 67.3k | set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv); |
3184 | 67.3k | if (is_arr) |
3185 | 59.1k | { |
3186 | 59.1k | advance_and_return_if_error_or_eof({}); |
3187 | 59.1k | if (*cp != U']') |
3188 | 59.1k | set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv); |
3189 | 59.1k | } |
3190 | 67.3k | advance_and_return_if_error({}); |
3191 | 67.3k | header_end_pos = current_position(1); |
3192 | | |
3193 | | // handle the rest of the line after the header |
3194 | 67.3k | consume_leading_whitespace(); |
3195 | 67.3k | if (!is_eof() && !consume_comment() && !consume_line_break()) |
3196 | 67.3k | set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); |
3197 | 67.3k | } |
3198 | 67.3k | TOML_ASSERT(!key_buffer.empty()); |
3199 | | |
3200 | | // check if each parent is a table/table array, or can be created implicitly as a table. |
3201 | 67.3k | table* parent = &root; |
3202 | 192k | for (size_t i = 0, e = key_buffer.size() - 1u; i < e; i++) |
3203 | 125k | { |
3204 | 125k | const std::string_view segment = key_buffer[i]; |
3205 | 125k | auto pit = parent->lower_bound(segment); |
3206 | | |
3207 | | // parent already existed |
3208 | 125k | if (pit != parent->end() && pit->first == segment) |
3209 | 81.8k | { |
3210 | 81.8k | node& p = pit->second; |
3211 | | |
3212 | 81.8k | if (auto tbl = p.as_table()) |
3213 | 31.1k | { |
3214 | | // adding to closed inline tables is illegal |
3215 | 31.1k | if (tbl->is_inline() && !impl::find(open_inline_tables.begin(), open_inline_tables.end(), tbl)) |
3216 | 31.1k | set_error_and_return_default("cannot insert '"sv, |
3217 | 31.1k | to_sv(recording_buffer), |
3218 | 31.1k | "' into existing inline table"sv); |
3219 | | |
3220 | 31.1k | parent = tbl; |
3221 | 31.1k | } |
3222 | 50.6k | else if (auto arr = p.as_array(); arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) |
3223 | 50.6k | { |
3224 | | // table arrays are a special case; |
3225 | | // the spec dictates we select the most recently declared element in the array. |
3226 | 50.6k | TOML_ASSERT(!arr->empty()); |
3227 | 50.6k | TOML_ASSERT(arr->back().is_table()); |
3228 | 50.6k | parent = &arr->back().ref_cast<table>(); |
3229 | 50.6k | } |
3230 | 13 | else |
3231 | 13 | { |
3232 | 13 | if (!is_arr && p.type() == node_type::table) |
3233 | 13 | set_error_and_return_default("cannot redefine existing table '"sv, |
3234 | 13 | to_sv(recording_buffer), |
3235 | 13 | "'"sv); |
3236 | 13 | else |
3237 | 13 | set_error_and_return_default("cannot redefine existing "sv, |
3238 | 13 | to_sv(p.type()), |
3239 | 13 | " '"sv, |
3240 | 13 | to_sv(recording_buffer), |
3241 | 13 | "' as "sv, |
3242 | 13 | is_arr ? "array-of-tables"sv : "table"sv); |
3243 | 13 | } |
3244 | 81.8k | } |
3245 | | |
3246 | | // need to create a new implicit table |
3247 | 43.5k | else |
3248 | 43.5k | { |
3249 | 43.5k | pit = parent->emplace_hint<table>(pit, make_key(i)); |
3250 | 43.5k | table& p = pit->second.ref_cast<table>(); |
3251 | 43.5k | p.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3252 | | |
3253 | 43.5k | implicit_tables.push_back(&p); |
3254 | 43.5k | parent = &p; |
3255 | 43.5k | } |
3256 | 125k | } |
3257 | | |
3258 | 67.2k | const auto last_segment = key_buffer.back(); |
3259 | 67.2k | auto it = parent->lower_bound(last_segment); |
3260 | | |
3261 | | // if there was already a matching node some sanity checking is necessary; |
3262 | | // this is ok if we're making an array and the existing element is already an array (new element) |
3263 | | // or if we're making a table and the existing element is an implicitly-created table (promote it), |
3264 | | // otherwise this is a redefinition error. |
3265 | 67.2k | if (it != parent->end() && it->first == last_segment) |
3266 | 14.8k | { |
3267 | 14.8k | node& matching_node = it->second; |
3268 | 14.8k | if (auto arr = matching_node.as_array(); |
3269 | 14.8k | is_arr && arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) |
3270 | 11.4k | { |
3271 | 11.4k | table& tbl = arr->emplace_back<table>(); |
3272 | 11.4k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3273 | 11.4k | return &tbl; |
3274 | 11.4k | } |
3275 | | |
3276 | 3.40k | else if (auto tbl = matching_node.as_table(); !is_arr && tbl && !implicit_tables.empty()) |
3277 | 3.38k | { |
3278 | 3.38k | if (auto found = impl::find(implicit_tables.begin(), implicit_tables.end(), tbl); found) |
3279 | 3.38k | { |
3280 | 3.38k | bool ok = true; |
3281 | 3.38k | if (!tbl->empty()) |
3282 | 3.38k | { |
3283 | 3.38k | for (auto& [_, child] : *tbl) |
3284 | 31.2k | { |
3285 | 31.2k | if (!child.is_table() && !child.is_array_of_tables()) |
3286 | 10 | { |
3287 | 10 | ok = false; |
3288 | 10 | break; |
3289 | 10 | } |
3290 | 31.2k | } |
3291 | 3.38k | } |
3292 | | |
3293 | 3.38k | if (ok) |
3294 | 3.37k | { |
3295 | 3.37k | implicit_tables.erase(implicit_tables.cbegin() + (found - implicit_tables.data())); |
3296 | 3.37k | tbl->source_.begin = header_begin_pos; |
3297 | 3.37k | tbl->source_.end = header_end_pos; |
3298 | 3.37k | return tbl; |
3299 | 3.37k | } |
3300 | 3.38k | } |
3301 | 3.38k | } |
3302 | | |
3303 | | // if we get here it's a redefinition error. |
3304 | 33 | if (!is_arr && matching_node.type() == node_type::table) |
3305 | 20 | { |
3306 | 20 | set_error_at(header_begin_pos, |
3307 | 20 | "cannot redefine existing table '"sv, |
3308 | 20 | to_sv(recording_buffer), |
3309 | 20 | "'"sv); |
3310 | 20 | return_after_error({}); |
3311 | 20 | } |
3312 | 13 | else |
3313 | 13 | { |
3314 | 13 | set_error_at(header_begin_pos, |
3315 | 13 | "cannot redefine existing "sv, |
3316 | 13 | to_sv(matching_node.type()), |
3317 | 13 | " '"sv, |
3318 | 13 | to_sv(recording_buffer), |
3319 | 13 | "' as "sv, |
3320 | 13 | is_arr ? "array-of-tables"sv : "table"sv); |
3321 | 13 | return_after_error({}); |
3322 | 13 | } |
3323 | 33 | } |
3324 | | |
3325 | | // there was no matching node, sweet - we can freely instantiate a new table/table array. |
3326 | 52.4k | else |
3327 | 52.4k | { |
3328 | 52.4k | auto last_key = make_key(key_buffer.size() - 1u); |
3329 | | |
3330 | | // if it's an array we need to make the array and it's first table element, |
3331 | | // set the starting regions, and return the table element |
3332 | 52.4k | if (is_arr) |
3333 | 47.6k | { |
3334 | 47.6k | it = parent->emplace_hint<array>(it, std::move(last_key)); |
3335 | 47.6k | array& tbl_arr = it->second.ref_cast<array>(); |
3336 | 47.6k | table_arrays.push_back(&tbl_arr); |
3337 | 47.6k | tbl_arr.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3338 | | |
3339 | 47.6k | table& tbl = tbl_arr.emplace_back<table>(); |
3340 | 47.6k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3341 | 47.6k | return &tbl; |
3342 | 47.6k | } |
3343 | | |
3344 | | // otherwise we're just making a table |
3345 | 4.76k | else |
3346 | 4.76k | { |
3347 | 4.76k | it = parent->emplace_hint<table>(it, std::move(last_key)); |
3348 | 4.76k | table& tbl = it->second.ref_cast<table>(); |
3349 | 4.76k | tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() }; |
3350 | 4.76k | return &tbl; |
3351 | 4.76k | } |
3352 | 52.4k | } |
3353 | 67.2k | } |
3354 | | |
3355 | | TOML_NEVER_INLINE |
3356 | | bool parse_key_value_pair_and_insert(table* tbl) |
3357 | 35.6k | { |
3358 | 35.6k | return_if_error({}); |
3359 | 35.6k | assert_not_eof(); |
3360 | 35.6k | TOML_ASSERT_ASSUME(is_string_delimiter(*cp) || is_bare_key_character(*cp)); |
3361 | 35.6k | push_parse_scope("key-value pair"sv); |
3362 | | |
3363 | | // read the key into the key buffer |
3364 | 35.6k | start_recording(); |
3365 | 35.6k | parse_key(); |
3366 | 35.6k | stop_recording(1u); |
3367 | 35.6k | return_if_error({}); |
3368 | 35.6k | TOML_ASSERT(key_buffer.size() >= 1u); |
3369 | | |
3370 | | // skip past any whitespace that followed the key |
3371 | 35.6k | consume_leading_whitespace(); |
3372 | 35.6k | set_error_and_return_if_eof({}); |
3373 | | |
3374 | | // '=' |
3375 | 35.5k | if (*cp != U'=') |
3376 | 35.5k | set_error_and_return_default("expected '=', saw '"sv, to_sv(*cp), "'"sv); |
3377 | 35.4k | advance_and_return_if_error_or_eof({}); |
3378 | | |
3379 | | // skip past any whitespace that followed the '=' |
3380 | 35.4k | consume_leading_whitespace(); |
3381 | 35.4k | return_if_error({}); |
3382 | 35.4k | set_error_and_return_if_eof({}); |
3383 | | |
3384 | | // check that the next character could actually be a value |
3385 | 35.4k | if (is_value_terminator(*cp)) |
3386 | 35.4k | set_error_and_return_default("expected value, saw '"sv, to_sv(*cp), "'"sv); |
3387 | | |
3388 | | // if it's a dotted kvp we need to spawn the parent sub-tables if necessary, |
3389 | | // and set the target table to the second-to-last one in the chain |
3390 | 35.4k | if (key_buffer.size() > 1u) |
3391 | 3.65k | { |
3392 | 465k | for (size_t i = 0; i < key_buffer.size() - 1u; i++) |
3393 | 461k | { |
3394 | 461k | const std::string_view segment = key_buffer[i]; |
3395 | 461k | auto pit = tbl->lower_bound(segment); |
3396 | | |
3397 | | // parent already existed |
3398 | 461k | if (pit != tbl->end() && pit->first == segment) |
3399 | 1.40k | { |
3400 | 1.40k | table* p = pit->second.as_table(); |
3401 | | |
3402 | | // redefinition |
3403 | 1.40k | if TOML_UNLIKELY(!p |
3404 | 1.40k | || !(impl::find(dotted_key_tables.begin(), dotted_key_tables.end(), p) |
3405 | 1.40k | || impl::find(implicit_tables.begin(), implicit_tables.end(), p))) |
3406 | 12 | { |
3407 | 12 | set_error_at(key_buffer.starts[i], |
3408 | 12 | "cannot redefine existing "sv, |
3409 | 12 | to_sv(pit->second.type()), |
3410 | 12 | " as dotted key-value pair"sv); |
3411 | 12 | return_after_error({}); |
3412 | 12 | } |
3413 | | |
3414 | 1.39k | tbl = p; |
3415 | 1.39k | } |
3416 | | |
3417 | | // need to create a new implicit table |
3418 | 460k | else |
3419 | 460k | { |
3420 | 460k | pit = tbl->emplace_hint<table>(pit, make_key(i)); |
3421 | 460k | table& p = pit->second.ref_cast<table>(); |
3422 | 460k | p.source_ = pit->first.source(); |
3423 | | |
3424 | 460k | dotted_key_tables.push_back(&p); |
3425 | 460k | tbl = &p; |
3426 | 460k | } |
3427 | 461k | } |
3428 | 3.65k | } |
3429 | | |
3430 | | // ensure this isn't a redefinition |
3431 | 35.4k | const std::string_view last_segment = key_buffer.back(); |
3432 | 35.4k | auto it = tbl->lower_bound(last_segment); |
3433 | 35.4k | if (it != tbl->end() && it->first == last_segment) |
3434 | 11 | { |
3435 | 11 | set_error("cannot redefine existing "sv, |
3436 | 11 | to_sv(it->second.type()), |
3437 | 11 | " '"sv, |
3438 | 11 | to_sv(recording_buffer), |
3439 | 11 | "'"sv); |
3440 | 11 | return_after_error({}); |
3441 | 11 | } |
3442 | | |
3443 | | // create the key first since the key buffer will likely get overwritten during value parsing (inline |
3444 | | // tables) |
3445 | 35.4k | auto last_key = make_key(key_buffer.size() - 1u); |
3446 | | |
3447 | | // now we can actually parse the value |
3448 | 35.4k | node_ptr val = parse_value(); |
3449 | 35.4k | return_if_error({}); |
3450 | | |
3451 | 35.4k | tbl->emplace_hint<node_ptr>(it, std::move(last_key), std::move(val)); |
3452 | 35.4k | return true; |
3453 | 35.4k | } |
3454 | | |
3455 | | void parse_document() |
3456 | 6.24k | { |
3457 | 6.24k | assert_not_error(); |
3458 | 6.24k | assert_not_eof(); |
3459 | 6.24k | push_parse_scope("root table"sv); |
3460 | | |
3461 | 6.24k | table* current_table = &root; |
3462 | | |
3463 | 6.24k | do |
3464 | 103k | { |
3465 | 103k | return_if_error(); |
3466 | | |
3467 | | // leading whitespace, line endings, comments |
3468 | 103k | if (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3469 | 8.44k | continue; |
3470 | 95.4k | return_if_error(); |
3471 | | |
3472 | | // [tables] |
3473 | | // [[table array]] |
3474 | 95.4k | if (*cp == U'[') |
3475 | 67.4k | current_table = parse_table_header(); |
3476 | | |
3477 | | // bare_keys |
3478 | | // dotted.keys |
3479 | | // "quoted keys" |
3480 | 28.0k | else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) |
3481 | 27.8k | { |
3482 | 27.8k | push_parse_scope("key-value pair"sv); |
3483 | | |
3484 | 27.8k | parse_key_value_pair_and_insert(current_table); |
3485 | | |
3486 | | // handle the rest of the line after the kvp |
3487 | | // (this is not done in parse_key_value_pair() because that is also used for inline tables) |
3488 | 27.8k | consume_leading_whitespace(); |
3489 | 27.8k | return_if_error(); |
3490 | 27.8k | if (!is_eof() && !consume_comment() && !consume_line_break()) |
3491 | 18 | set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); |
3492 | 27.8k | } |
3493 | | |
3494 | 181 | else // ?? |
3495 | 181 | set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv); |
3496 | 95.4k | } |
3497 | 103k | while (!is_eof()); |
3498 | | |
3499 | 6.24k | auto eof_pos = current_position(1); |
3500 | 6.24k | root.source_.end = eof_pos; |
3501 | 6.24k | if (current_table && current_table != &root && current_table->source_.end <= current_table->source_.begin) |
3502 | 0 | current_table->source_.end = eof_pos; |
3503 | 6.24k | } |
3504 | | |
3505 | | static void update_region_ends(node& nde) noexcept |
3506 | 1.10M | { |
3507 | 1.10M | const auto type = nde.type(); |
3508 | 1.10M | if (type > node_type::array) |
3509 | 820k | return; |
3510 | | |
3511 | 284k | if (type == node_type::table) |
3512 | 245k | { |
3513 | 245k | auto& tbl = nde.ref_cast<table>(); |
3514 | 245k | if (tbl.is_inline()) // inline tables (and all their inline descendants) are already correctly |
3515 | | // terminated |
3516 | 793 | return; |
3517 | | |
3518 | 245k | auto end = nde.source_.end; |
3519 | 245k | for (auto&& [k, v] : tbl) |
3520 | 255k | { |
3521 | 255k | TOML_UNUSED(k); |
3522 | 255k | update_region_ends(v); |
3523 | 255k | if (end < v.source_.end) |
3524 | 167k | end = v.source_.end; |
3525 | 255k | } |
3526 | 245k | } |
3527 | 38.3k | else // arrays |
3528 | 38.3k | { |
3529 | 38.3k | auto& arr = nde.ref_cast<array>(); |
3530 | 38.3k | auto end = nde.source_.end; |
3531 | 38.3k | for (auto&& v : arr) |
3532 | 844k | { |
3533 | 844k | update_region_ends(v); |
3534 | 844k | if (end < v.source_.end) |
3535 | 6.57k | end = v.source_.end; |
3536 | 844k | } |
3537 | 38.3k | nde.source_.end = end; |
3538 | 38.3k | } |
3539 | 284k | } |
3540 | | |
3541 | | public: |
3542 | | parser(utf8_reader_interface&& reader_) // |
3543 | 6.35k | : reader{ reader_ } |
3544 | 6.35k | { |
3545 | 6.35k | root.source_ = { prev_pos, prev_pos, reader.source_path() }; |
3546 | | |
3547 | 6.35k | if (!reader.peek_eof()) |
3548 | 6.30k | { |
3549 | 6.30k | cp = reader.read_next(); |
3550 | | |
3551 | | #if !TOML_EXCEPTIONS |
3552 | | if (reader.error()) |
3553 | | { |
3554 | | err = std::move(reader.error()); |
3555 | | return; |
3556 | | } |
3557 | | #endif |
3558 | | |
3559 | 6.30k | if (cp) |
3560 | 6.24k | parse_document(); |
3561 | 6.30k | } |
3562 | | |
3563 | 6.35k | update_region_ends(root); |
3564 | 6.35k | } |
3565 | | |
3566 | | TOML_NODISCARD |
3567 | | operator parse_result() && noexcept |
3568 | 3.48k | { |
3569 | 3.48k | #if TOML_EXCEPTIONS |
3570 | | |
3571 | 3.48k | return { std::move(root) }; |
3572 | | |
3573 | | #else |
3574 | | |
3575 | | if (err) |
3576 | | return parse_result{ *std::move(err) }; |
3577 | | else |
3578 | | return parse_result{ std::move(root) }; |
3579 | | |
3580 | | #endif |
3581 | 3.48k | } |
3582 | | }; |
3583 | | |
3584 | | TOML_EXTERNAL_LINKAGE |
3585 | | node_ptr parser::parse_array() |
3586 | 4.93k | { |
3587 | 4.93k | return_if_error({}); |
3588 | 4.93k | assert_not_eof(); |
3589 | 4.93k | TOML_ASSERT_ASSUME(*cp == U'['); |
3590 | 4.93k | push_parse_scope("array"sv); |
3591 | | |
3592 | | // skip opening '[' |
3593 | 4.93k | advance_and_return_if_error_or_eof({}); |
3594 | | |
3595 | 4.91k | node_ptr arr_ptr{ new array{} }; |
3596 | 4.91k | array& arr = arr_ptr->ref_cast<array>(); |
3597 | 4.91k | enum class TOML_CLOSED_ENUM parse_type : int |
3598 | 4.91k | { |
3599 | 4.91k | none, |
3600 | 4.91k | comma, |
3601 | 4.91k | val |
3602 | 4.91k | }; |
3603 | 4.91k | parse_type prev = parse_type::none; |
3604 | | |
3605 | 1.68M | while (!is_error()) |
3606 | 1.68M | { |
3607 | 1.70M | while (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3608 | 26.0k | continue; |
3609 | 1.68M | set_error_and_return_if_eof({}); |
3610 | | |
3611 | | // commas - only legal after a value |
3612 | 1.68M | if (*cp == U',') |
3613 | 837k | { |
3614 | 837k | if (prev == parse_type::val) |
3615 | 837k | { |
3616 | 837k | prev = parse_type::comma; |
3617 | 837k | advance_and_return_if_error_or_eof({}); |
3618 | 837k | continue; |
3619 | 837k | } |
3620 | 1 | set_error_and_return_default("expected value or closing ']', saw comma"sv); |
3621 | 1 | } |
3622 | | |
3623 | | // closing ']' |
3624 | 845k | else if (*cp == U']') |
3625 | 3.31k | { |
3626 | 3.31k | advance_and_return_if_error({}); |
3627 | 3.31k | break; |
3628 | 3.31k | } |
3629 | | |
3630 | | // must be a value |
3631 | 841k | else |
3632 | 841k | { |
3633 | 841k | if (prev == parse_type::val) |
3634 | 21 | { |
3635 | 21 | set_error_and_return_default("expected comma or closing ']', saw '"sv, to_sv(*cp), "'"sv); |
3636 | 0 | continue; |
3637 | 21 | } |
3638 | 841k | prev = parse_type::val; |
3639 | | |
3640 | 841k | auto val = parse_value(); |
3641 | 841k | return_if_error({}); |
3642 | | |
3643 | 841k | if (!arr.capacity()) |
3644 | 2.96k | arr.reserve(4u); |
3645 | 841k | arr.emplace_back<node_ptr>(std::move(val)); |
3646 | 841k | } |
3647 | 1.68M | } |
3648 | | |
3649 | 4.78k | return_if_error({}); |
3650 | 4.78k | return arr_ptr; |
3651 | 4.91k | } |
3652 | | |
3653 | | TOML_EXTERNAL_LINKAGE |
3654 | | node_ptr parser::parse_inline_table() |
3655 | 7.56k | { |
3656 | 7.56k | return_if_error({}); |
3657 | 7.56k | assert_not_eof(); |
3658 | 7.56k | TOML_ASSERT_ASSUME(*cp == U'{'); |
3659 | 7.56k | push_parse_scope("inline table"sv); |
3660 | | |
3661 | | // skip opening '{' |
3662 | 7.56k | advance_and_return_if_error_or_eof({}); |
3663 | | |
3664 | 7.54k | node_ptr tbl_ptr{ new table{} }; |
3665 | 7.54k | table& tbl = tbl_ptr->ref_cast<table>(); |
3666 | 7.54k | tbl.is_inline(true); |
3667 | 7.54k | table_vector_scope table_scope{ open_inline_tables, tbl }; |
3668 | | |
3669 | 7.54k | enum class TOML_CLOSED_ENUM parse_type : int |
3670 | 7.54k | { |
3671 | 7.54k | none, |
3672 | 7.54k | comma, |
3673 | 7.54k | kvp |
3674 | 7.54k | }; |
3675 | 7.54k | parse_type prev = parse_type::none; |
3676 | 16.2k | while (!is_error()) |
3677 | 14.5k | { |
3678 | | if constexpr (TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables) |
3679 | | { |
3680 | | while (consume_leading_whitespace() || consume_line_break() || consume_comment()) |
3681 | | continue; |
3682 | | } |
3683 | | else |
3684 | 14.5k | { |
3685 | 16.9k | while (consume_leading_whitespace()) |
3686 | 2.41k | continue; |
3687 | 14.5k | } |
3688 | 14.5k | return_if_error({}); |
3689 | 14.5k | set_error_and_return_if_eof({}); |
3690 | | |
3691 | | // commas - only legal after a key-value pair |
3692 | 14.5k | if (*cp == U',') |
3693 | 899 | { |
3694 | 899 | if (prev == parse_type::kvp) |
3695 | 897 | { |
3696 | 897 | prev = parse_type::comma; |
3697 | 897 | advance_and_return_if_error_or_eof({}); |
3698 | 897 | } |
3699 | 2 | else |
3700 | 899 | set_error_and_return_default("expected key-value pair or closing '}', saw comma"sv); |
3701 | 899 | } |
3702 | | |
3703 | | // closing '}' |
3704 | 13.6k | else if (*cp == U'}') |
3705 | 5.83k | { |
3706 | | if constexpr (!TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables) |
3707 | 5.83k | { |
3708 | 5.83k | if (prev == parse_type::comma) |
3709 | 1 | { |
3710 | 1 | set_error_and_return_default("expected key-value pair, saw closing '}' (dangling comma)"sv); |
3711 | 0 | continue; |
3712 | 1 | } |
3713 | 5.83k | } |
3714 | 5.83k | advance_and_return_if_error({}); |
3715 | 5.83k | break; |
3716 | 5.83k | } |
3717 | | |
3718 | | // key-value pair |
3719 | 7.84k | else if (is_string_delimiter(*cp) || is_bare_key_character(*cp)) |
3720 | 7.81k | { |
3721 | 7.81k | if (prev == parse_type::kvp) |
3722 | 7.81k | set_error_and_return_default("expected comma or closing '}', saw '"sv, to_sv(*cp), "'"sv); |
3723 | 7.80k | else |
3724 | 7.80k | { |
3725 | 7.80k | prev = parse_type::kvp; |
3726 | 7.80k | parse_key_value_pair_and_insert(&tbl); |
3727 | 7.80k | } |
3728 | 7.81k | } |
3729 | | |
3730 | | /// ??? |
3731 | 31 | else |
3732 | 31 | set_error_and_return_default("expected key or closing '}', saw '"sv, to_sv(*cp), "'"sv); |
3733 | 14.5k | } |
3734 | | |
3735 | 7.50k | return_if_error({}); |
3736 | 7.50k | return tbl_ptr; |
3737 | 7.54k | } |
3738 | | |
3739 | | TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS |
3740 | | } |
3741 | | TOML_IMPL_NAMESPACE_END; |
3742 | | |
3743 | | #undef TOML_RETURNS_BY_THROWING |
3744 | | #undef advance_and_return_if_error |
3745 | | #undef advance_and_return_if_error_or_eof |
3746 | | #undef assert_not_eof |
3747 | | #undef assert_not_error |
3748 | | #undef is_eof |
3749 | | #undef is_error |
3750 | | #undef parse_error_break |
3751 | | #undef push_parse_scope |
3752 | | #undef push_parse_scope_1 |
3753 | | #undef push_parse_scope_2 |
3754 | | #undef return_after_error |
3755 | | #undef return_if_eof |
3756 | | #undef return_if_error |
3757 | | #undef return_if_error_or_eof |
3758 | | #undef set_error_and_return |
3759 | | #undef set_error_and_return_default |
3760 | | #undef set_error_and_return_if_eof |
3761 | | #undef utf8_buffered_reader_error_check |
3762 | | #undef utf8_reader_error |
3763 | | #undef utf8_reader_error_check |
3764 | | #undef utf8_reader_return_after_error |
3765 | | |
3766 | | //#--------------------------------------------------------------------------------------------------------------------- |
3767 | | //# PARSER PUBLIC IMPLEMENTATION |
3768 | | //#--------------------------------------------------------------------------------------------------------------------- |
3769 | | |
3770 | | TOML_ANON_NAMESPACE_START |
3771 | | { |
3772 | | TOML_NODISCARD |
3773 | | TOML_INTERNAL_LINKAGE |
3774 | | parse_result do_parse(utf8_reader_interface && reader) |
3775 | 6.35k | { |
3776 | 6.35k | return impl::parser{ std::move(reader) }; |
3777 | 6.35k | } |
3778 | | |
3779 | | TOML_NODISCARD |
3780 | | TOML_INTERNAL_LINKAGE |
3781 | | parse_result do_parse_file(std::string_view file_path) |
3782 | 0 | { |
3783 | 0 | #if TOML_EXCEPTIONS |
3784 | 0 | #define TOML_PARSE_FILE_ERROR(msg, path) \ |
3785 | 0 | throw parse_error(msg, source_position{}, std::make_shared<const std::string>(std::move(path))) |
3786 | 0 | #else |
3787 | 0 | #define TOML_PARSE_FILE_ERROR(msg, path) \ |
3788 | 0 | return parse_result(parse_error(msg, source_position{}, std::make_shared<const std::string>(std::move(path)))) |
3789 | 0 | #endif |
3790 | 0 |
|
3791 | 0 | std::string file_path_str(file_path); |
3792 | 0 |
|
3793 | 0 | // open file with a custom-sized stack buffer |
3794 | 0 | std::ifstream file; |
3795 | 0 | TOML_OVERALIGNED char file_buffer[sizeof(void*) * 1024u]; |
3796 | 0 | file.rdbuf()->pubsetbuf(file_buffer, sizeof(file_buffer)); |
3797 | 0 | #if TOML_WINDOWS && !(defined(__MINGW32__) || defined(__MINGW64__)) |
3798 | 0 | file.open(impl::widen(file_path_str).c_str(), std::ifstream::in | std::ifstream::binary | std::ifstream::ate); |
3799 | 0 | #else |
3800 | 0 | file.open(file_path_str, std::ifstream::in | std::ifstream::binary | std::ifstream::ate); |
3801 | 0 | #endif |
3802 | 0 | if (!file.is_open()) |
3803 | 0 | TOML_PARSE_FILE_ERROR("File could not be opened for reading", file_path_str); |
3804 | 0 |
|
3805 | 0 | // get size |
3806 | 0 | const auto file_size = file.tellg(); |
3807 | 0 | if (file_size == -1) |
3808 | 0 | TOML_PARSE_FILE_ERROR("Could not determine file size", file_path_str); |
3809 | 0 | file.seekg(0, std::ifstream::beg); |
3810 | 0 |
|
3811 | 0 | // read the whole file into memory first if the file isn't too large |
3812 | 0 | constexpr auto large_file_threshold = 1024 * 1024 * 2; // 2 MB |
3813 | 0 | if (file_size <= large_file_threshold) |
3814 | 0 | { |
3815 | 0 | std::vector<char> file_data; |
3816 | 0 | file_data.resize(static_cast<size_t>(file_size)); |
3817 | 0 | file.read(file_data.data(), static_cast<std::streamsize>(file_size)); |
3818 | 0 | return parse(std::string_view{ file_data.data(), file_data.size() }, std::move(file_path_str)); |
3819 | 0 | } |
3820 | 0 |
|
3821 | 0 | // otherwise parse it using the streams |
3822 | 0 | else |
3823 | 0 | return parse(file, std::move(file_path_str)); |
3824 | 0 |
|
3825 | 0 | #undef TOML_PARSE_FILE_ERROR |
3826 | 0 | } |
3827 | | } |
3828 | | TOML_ANON_NAMESPACE_END; |
3829 | | |
3830 | | TOML_NAMESPACE_START |
3831 | | { |
3832 | | TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex); |
3833 | | |
3834 | | TOML_EXTERNAL_LINKAGE |
3835 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path) |
3836 | 6.35k | { |
3837 | 6.35k | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3838 | 6.35k | } |
3839 | | |
3840 | | TOML_EXTERNAL_LINKAGE |
3841 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::string && source_path) |
3842 | 0 | { |
3843 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3844 | 0 | } |
3845 | | |
3846 | | TOML_EXTERNAL_LINKAGE |
3847 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::string_view source_path) |
3848 | 0 | { |
3849 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3850 | 0 | } |
3851 | | |
3852 | | TOML_EXTERNAL_LINKAGE |
3853 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::string && source_path) |
3854 | 0 | { |
3855 | 0 | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3856 | 0 | } |
3857 | | |
3858 | | TOML_EXTERNAL_LINKAGE |
3859 | | parse_result TOML_CALLCONV parse_file(std::string_view file_path) |
3860 | 0 | { |
3861 | 0 | return TOML_ANON_NAMESPACE::do_parse_file(file_path); |
3862 | 0 | } |
3863 | | |
3864 | | #if TOML_HAS_CHAR8 |
3865 | | |
3866 | | TOML_EXTERNAL_LINKAGE |
3867 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string_view source_path) |
3868 | | { |
3869 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path }); |
3870 | | } |
3871 | | |
3872 | | TOML_EXTERNAL_LINKAGE |
3873 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string && source_path) |
3874 | | { |
3875 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) }); |
3876 | | } |
3877 | | |
3878 | | TOML_EXTERNAL_LINKAGE |
3879 | | parse_result TOML_CALLCONV parse_file(std::u8string_view file_path) |
3880 | | { |
3881 | | std::string file_path_str; |
3882 | | file_path_str.resize(file_path.length()); |
3883 | | memcpy(file_path_str.data(), file_path.data(), file_path.length()); |
3884 | | return TOML_ANON_NAMESPACE::do_parse_file(file_path_str); |
3885 | | } |
3886 | | |
3887 | | #endif // TOML_HAS_CHAR8 |
3888 | | |
3889 | | #if TOML_ENABLE_WINDOWS_COMPAT |
3890 | | |
3891 | | TOML_EXTERNAL_LINKAGE |
3892 | | parse_result TOML_CALLCONV parse(std::string_view doc, std::wstring_view source_path) |
3893 | | { |
3894 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3895 | | } |
3896 | | |
3897 | | TOML_EXTERNAL_LINKAGE |
3898 | | parse_result TOML_CALLCONV parse(std::istream & doc, std::wstring_view source_path) |
3899 | | { |
3900 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3901 | | } |
3902 | | |
3903 | | TOML_EXTERNAL_LINKAGE |
3904 | | parse_result TOML_CALLCONV parse_file(std::wstring_view file_path) |
3905 | | { |
3906 | | return TOML_ANON_NAMESPACE::do_parse_file(impl::narrow(file_path)); |
3907 | | } |
3908 | | |
3909 | | #endif // TOML_ENABLE_WINDOWS_COMPAT |
3910 | | |
3911 | | #if TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT |
3912 | | |
3913 | | TOML_EXTERNAL_LINKAGE |
3914 | | parse_result TOML_CALLCONV parse(std::u8string_view doc, std::wstring_view source_path) |
3915 | | { |
3916 | | return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) }); |
3917 | | } |
3918 | | |
3919 | | #endif // TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT |
3920 | | |
3921 | | TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS |
3922 | | } |
3923 | | TOML_NAMESPACE_END; |
3924 | | |
3925 | | #undef TOML_OVERALIGNED |
3926 | | #include "header_end.hpp" |
3927 | | #endif // TOML_ENABLE_PARSER |