Coverage Report

Created: 2025-06-22 07:08

/src/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2013-2025 Daniel Parker
2
// Distributed under the Boost license, Version 1.0.
3
// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
5
// See https://github.com/danielaparker/jsoncons for latest version
6
7
#ifndef JSONCONS_CSV_CSV_PARSER_HPP
8
#define JSONCONS_CSV_CSV_PARSER_HPP
9
10
#include <cctype>
11
#include <cstddef>
12
#include <cstdint>
13
#include <functional>
14
#include <memory> // std::allocator
15
#include <sstream>
16
#include <string>
17
#include <system_error>
18
#include <vector>
19
20
#include <jsoncons/config/compiler_support.hpp>
21
#include <jsoncons/config/jsoncons_config.hpp>
22
#include <jsoncons/utility/read_number.hpp>
23
#include <jsoncons/json_exception.hpp>
24
#include <jsoncons/json_filter.hpp>
25
#include <jsoncons/json_reader.hpp>
26
#include <jsoncons/json_type.hpp>
27
#include <jsoncons/json_visitor.hpp>
28
#include <jsoncons/semantic_tag.hpp>
29
#include <jsoncons/ser_context.hpp>
30
#include <jsoncons/staj_event.hpp>
31
32
#include <jsoncons_ext/csv/csv_error.hpp>
33
#include <jsoncons_ext/csv/csv_options.hpp>
34
35
namespace jsoncons { 
36
namespace csv {
37
38
enum class csv_mode 
39
{
40
    initial,
41
    header,
42
    data,
43
    subfields
44
};
45
46
enum class csv_parse_state 
47
{
48
    start,
49
    cr, 
50
    expect_comment_or_record,
51
    expect_record,
52
    end_record,
53
    no_more_records,
54
    comment,
55
    between_values,
56
    quoted_string,
57
    unquoted_string,
58
    before_unquoted_string,
59
    escaped_value,
60
    minus, 
61
    zero,  
62
    integer,
63
    fraction,
64
    exp1,
65
    exp2,
66
    exp3,
67
    accept,
68
    before_unquoted_field,
69
    before_unquoted_field_tail, 
70
    before_unquoted_field_tail1,
71
    before_last_unquoted_field,
72
    before_last_unquoted_field_tail,
73
    before_unquoted_subfield,
74
    before_unquoted_subfield_tail,
75
    before_quoted_subfield,
76
    before_quoted_subfield_tail,
77
    before_quoted_field,
78
    before_quoted_field_tail,
79
    before_last_quoted_field,
80
    before_last_quoted_field_tail,
81
    done
82
};
83
84
enum class cached_state
85
{
86
    begin_object,
87
    end_object,
88
    begin_array,
89
    end_array,
90
    name,
91
    item,
92
    done
93
};
94
95
struct default_csv_parsing
96
{
97
    bool operator()(csv_errc, const ser_context&) noexcept
98
0
    {
99
0
        return false;
100
0
    }
101
};
102
103
namespace detail {
104
105
    template <typename CharT,typename TempAllocator >
106
    class parse_event
107
    {
108
        using temp_allocator_type = TempAllocator;
109
        using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
110
        using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
111
        using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>;                  
112
        using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
113
        using byte_string_type = basic_byte_string<byte_allocator_type>;
114
115
        staj_event_type event_type;
116
        string_type string_value;
117
        byte_string_type byte_string_value;
118
        union
119
        {
120
            bool bool_value;
121
            int64_t int64_value;
122
            uint64_t uint64_value;
123
            double double_value;
124
        };
125
        semantic_tag tag;
126
    public:
127
        parse_event(staj_event_type event_type, semantic_tag tag, const TempAllocator& alloc)
128
0
            : event_type(event_type), 
129
0
              string_value(alloc),
130
0
              byte_string_value(alloc),
131
0
              tag(tag)
132
0
        {
133
0
        }
134
135
        parse_event(const string_view_type& value, semantic_tag tag, const TempAllocator& alloc)
136
0
            : event_type(staj_event_type::string_value), 
137
0
              string_value(value.data(),value.length(),alloc), 
138
0
              byte_string_value(alloc),
139
0
              tag(tag)
140
0
        {
141
0
        }
142
143
        parse_event(const byte_string_view& value, semantic_tag tag, const TempAllocator& alloc)
144
0
            : event_type(staj_event_type::byte_string_value), 
145
0
              string_value(alloc),
146
0
              byte_string_value(value.data(),value.size(),alloc), 
147
0
              tag(tag)
148
0
        {
149
0
        }
150
151
        parse_event(bool value, semantic_tag tag, const TempAllocator& alloc)
152
0
            : event_type(staj_event_type::bool_value), 
153
0
              string_value(alloc),
154
0
              byte_string_value(alloc),
155
0
              bool_value(value), 
156
0
              tag(tag)
157
0
        {
158
0
        }
159
160
        parse_event(int64_t value, semantic_tag tag, const TempAllocator& alloc)
161
0
            : event_type(staj_event_type::int64_value), 
162
0
              string_value(alloc),
163
0
              byte_string_value(alloc),
164
0
              int64_value(value), 
165
0
              tag(tag)
166
0
        {
167
0
        }
168
169
        parse_event(uint64_t value, semantic_tag tag, const TempAllocator& alloc)
170
0
            : event_type(staj_event_type::uint64_value), 
171
0
              string_value(alloc),
172
0
              byte_string_value(alloc),
173
0
              uint64_value(value), 
174
0
              tag(tag)
175
0
        {
176
0
        }
177
178
        parse_event(double value, semantic_tag tag, const TempAllocator& alloc)
179
0
            : event_type(staj_event_type::double_value), 
180
0
              string_value(alloc),
181
0
              byte_string_value(alloc),
182
0
              double_value(value), 
183
0
              tag(tag)
184
0
        {
185
0
        }
186
187
        parse_event(const parse_event&) = default;
188
0
        parse_event(parse_event&&) = default;
189
        parse_event& operator=(const parse_event&) = default;
190
        parse_event& operator=(parse_event&&) = default;
191
192
        void replay(basic_json_visitor<CharT>& visitor) const
193
0
        {
194
0
            switch (event_type)
195
0
            {
196
0
                case staj_event_type::begin_array:
197
0
                    visitor.begin_array(tag, ser_context());
198
0
                    break;
199
0
                case staj_event_type::end_array:
200
0
                    visitor.end_array(ser_context());
201
0
                    break;
202
0
                case staj_event_type::string_value:
203
0
                    visitor.string_value(string_value, tag, ser_context());
204
0
                    break;
205
0
                case staj_event_type::byte_string_value:
206
0
                    visitor.byte_string_value(byte_string_value, tag, ser_context());
207
0
                    break;
208
0
                case staj_event_type::null_value:
209
0
                    visitor.null_value(tag, ser_context());
210
0
                    break;
211
0
                case staj_event_type::bool_value:
212
0
                    visitor.bool_value(bool_value, tag, ser_context());
213
0
                    break;
214
0
                case staj_event_type::int64_value:
215
0
                    visitor.int64_value(int64_value, tag, ser_context());
216
0
                    break;
217
0
                case staj_event_type::uint64_value:
218
0
                    visitor.uint64_value(uint64_value, tag, ser_context());
219
0
                    break;
220
0
                case staj_event_type::double_value:
221
0
                    visitor.double_value(double_value, tag, ser_context());
222
0
                    break;
223
0
                default:
224
0
                    break;
225
0
            }
226
0
        }
227
    };
228
229
    template <typename CharT,typename TempAllocator >
230
    class m_columns_filter : public basic_json_visitor<CharT>
231
    {
232
    public:
233
        using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
234
        using char_type = CharT;
235
        using temp_allocator_type = TempAllocator;
236
237
        using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
238
        using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
239
240
        using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>;
241
        using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAllocator>>;
242
        using parse_event_vector_type = std::vector<parse_event<CharT,TempAllocator>, parse_event_allocator_type>;
243
        using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>;
244
    private:
245
        TempAllocator alloc_;
246
        std::size_t name_index_{0};
247
        int level_{0};
248
        int level2_{0};
249
        cached_state state_{cached_state::begin_object};
250
        std::size_t column_index_{0};
251
        std::size_t row_index_{0};
252
253
        std::vector<string_type, string_allocator_type> column_names_;
254
        std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_;
255
    public:
256
257
        m_columns_filter(const TempAllocator& alloc)
258
6.29k
            : alloc_(alloc),
259
6.29k
              column_names_(alloc),
260
6.29k
              cached_events_(alloc)
261
6.29k
        {
262
6.29k
        }
263
264
        void reset()
265
        {
266
            name_index_ = 0;
267
            level_ = 0;
268
            level2_ = 0;
269
            state_ = cached_state::begin_object;
270
            column_index_ = 0;
271
            row_index_ = 0;
272
            column_names_.clear();
273
            cached_events_.clear();
274
        }
275
276
        bool done() const
277
0
        {
278
0
            return state_ == cached_state::done;
279
0
        }
280
281
        void initialize(const std::vector<string_type, string_allocator_type>& column_names)
282
0
        {
283
0
            for (const auto& name : column_names)
284
0
            {
285
0
                column_names_.push_back(name);
286
0
                cached_events_.emplace_back(alloc_);
287
0
            }
288
0
            name_index_ = 0;
289
0
            level_ = 0;
290
0
            level2_ = 0;
291
0
            column_index_ = 0;
292
0
            row_index_ = 0;
293
0
            state_ = cached_state::begin_object;
294
0
        }
295
296
        void skip_column()
297
0
        {
298
0
            ++name_index_;
299
0
        }
300
        
301
        int level() const
302
        {
303
            return static_cast<int>(level_);
304
        }
305
306
        bool replay_parse_events(basic_json_visitor<CharT>& visitor, bool cursor_mode, int mark_level)
307
0
        {
308
0
            bool more = true;
309
0
            while (more)
310
0
            {
311
0
                switch (state_)
312
0
                {
313
0
                    case cached_state::begin_object:
314
0
                        visitor.begin_object(semantic_tag::none, ser_context());
315
0
                        ++level_;
316
0
                        more = !cursor_mode;
317
0
                        column_index_ = 0;
318
0
                        state_ = cached_state::name;
319
0
                        break;
320
0
                    case cached_state::end_object:
321
0
                        visitor.end_object(ser_context());
322
0
                        more = !cursor_mode;
323
0
                        if (level_ == mark_level)
324
0
                        {
325
0
                            more = false;
326
0
                        }
327
0
                        --level_;
328
0
                        state_ = cached_state::done;
329
0
                        break;
330
0
                    case cached_state::name:
331
0
                        if (column_index_ < column_names_.size())
332
0
                        {
333
0
                            visitor.key(column_names_[column_index_], ser_context());
334
0
                            more = !cursor_mode;
335
0
                            state_ = cached_state::begin_array;
336
0
                        }
337
0
                        else
338
0
                        {
339
0
                            state_ = cached_state::end_object;
340
0
                        }
341
0
                        break;
342
0
                    case cached_state::begin_array:
343
0
                        visitor.begin_array(semantic_tag::none, ser_context());
344
0
                        ++level_;
345
0
                        more = !cursor_mode;
346
0
                        row_index_ = 0;
347
0
                        state_ = cached_state::item;
348
0
                        break;
349
0
                    case cached_state::end_array:
350
0
                        visitor.end_array(ser_context());
351
0
                        more = !cursor_mode;
352
0
                        if (level_ == mark_level)
353
0
                        {
354
0
                            more = false;
355
0
                        }
356
0
                        --level_;
357
0
                        ++column_index_;
358
0
                        state_ = cached_state::name;
359
0
                        break;
360
0
                    case cached_state::item:
361
0
                        if (row_index_ < cached_events_[column_index_].size())
362
0
                        {
363
0
                            cached_events_[column_index_][row_index_].replay(visitor);
364
0
                            more = !cursor_mode;
365
0
                            ++row_index_;
366
0
                        }
367
0
                        else
368
0
                        {
369
0
                            state_ = cached_state::end_array;
370
0
                        }
371
0
                        break;
372
0
                    default:
373
0
                        more = false;
374
0
                        break;
375
0
                }
376
0
            }
377
0
            return more;
378
0
        }
379
380
        void visit_flush() override
381
0
        {
382
0
        }
383
384
        JSONCONS_VISITOR_RETURN_TYPE visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override
385
0
        {
386
0
            ec = csv_errc::invalid_parse_state;
387
0
            JSONCONS_VISITOR_RETURN;
388
0
        }
389
390
        JSONCONS_VISITOR_RETURN_TYPE visit_end_object(const ser_context&, std::error_code& ec) override
391
0
        {
392
0
            ec = csv_errc::invalid_parse_state;
393
0
            JSONCONS_VISITOR_RETURN;
394
0
        }
395
396
        JSONCONS_VISITOR_RETURN_TYPE visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override
397
0
        {
398
0
            if (name_index_ < column_names_.size())
399
0
            {
400
0
                cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_);
401
                
402
0
                ++level2_;
403
0
            }
404
0
            JSONCONS_VISITOR_RETURN;
405
0
        }
406
407
        JSONCONS_VISITOR_RETURN_TYPE visit_end_array(const ser_context&, std::error_code&) override
408
0
        {
409
0
            if (level2_ > 0)
410
0
            {
411
0
                cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_);
412
0
                ++name_index_;
413
0
                --level2_;
414
0
            }
415
0
            else
416
0
            {
417
0
                name_index_ = 0;
418
0
            }
419
0
            JSONCONS_VISITOR_RETURN;
420
0
        }
421
422
        JSONCONS_VISITOR_RETURN_TYPE visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override
423
0
        {
424
0
            ec = csv_errc::invalid_parse_state;
425
0
            JSONCONS_VISITOR_RETURN;
426
0
        }
427
428
        JSONCONS_VISITOR_RETURN_TYPE visit_null(semantic_tag tag, const ser_context&, std::error_code&) override
429
0
        {
430
0
            if (name_index_ < column_names_.size())
431
0
            {
432
0
                cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_);
433
0
                if (level2_ == 0)
434
0
                {
435
0
                    ++name_index_;
436
0
                }
437
0
            }
438
0
            JSONCONS_VISITOR_RETURN;
439
0
        }
440
441
        JSONCONS_VISITOR_RETURN_TYPE visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override
442
0
        {
443
0
            if (name_index_ < column_names_.size())
444
0
            {
445
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
446
447
0
                if (level2_ == 0)
448
0
                {
449
0
                    ++name_index_;
450
0
                }
451
0
            }
452
0
            JSONCONS_VISITOR_RETURN;
453
0
        }
454
455
        JSONCONS_VISITOR_RETURN_TYPE visit_byte_string(const byte_string_view& value,
456
                                  semantic_tag tag,
457
                                  const ser_context&,
458
                                  std::error_code&) override
459
0
        {
460
0
            if (name_index_ < column_names_.size())
461
0
            {
462
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
463
0
                if (level2_ == 0)
464
0
                {
465
0
                    ++name_index_;
466
0
                }
467
0
            }
468
0
            JSONCONS_VISITOR_RETURN;
469
0
        }
470
471
        JSONCONS_VISITOR_RETURN_TYPE visit_double(double value,
472
                             semantic_tag tag, 
473
                             const ser_context&,
474
                             std::error_code&) override
475
0
        {
476
0
            if (name_index_ < column_names_.size())
477
0
            {
478
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
479
0
                if (level2_ == 0)
480
0
                {
481
0
                    ++name_index_;
482
0
                }
483
0
            }
484
0
            JSONCONS_VISITOR_RETURN;
485
0
        }
486
487
        JSONCONS_VISITOR_RETURN_TYPE visit_int64(int64_t value,
488
                            semantic_tag tag,
489
                            const ser_context&,
490
                            std::error_code&) override
491
0
        {
492
0
            if (name_index_ < column_names_.size())
493
0
            {
494
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
495
0
                if (level2_ == 0)
496
0
                {
497
0
                    ++name_index_;
498
0
                }
499
0
            }
500
0
            JSONCONS_VISITOR_RETURN;
501
0
        }
502
503
        JSONCONS_VISITOR_RETURN_TYPE visit_uint64(uint64_t value,
504
                             semantic_tag tag,
505
                             const ser_context&,
506
                             std::error_code&) override
507
0
        {
508
0
            if (name_index_ < column_names_.size())
509
0
            {
510
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
511
0
                if (level2_ == 0)
512
0
                {
513
0
                    ++name_index_;
514
0
                }
515
0
            }
516
0
            JSONCONS_VISITOR_RETURN;
517
0
        }
518
519
        JSONCONS_VISITOR_RETURN_TYPE visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override
520
0
        {
521
0
            if (name_index_ < column_names_.size())
522
0
            {
523
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
524
0
                if (level2_ == 0)
525
0
                {
526
0
                    ++name_index_;
527
0
                }
528
0
            }
529
0
            JSONCONS_VISITOR_RETURN;
530
0
        }
531
    };
532
533
} // namespace detail
534
535
template <typename CharT,typename TempAllocator =std::allocator<char>>
536
class basic_csv_parser : public ser_context
537
{
538
public:
539
    using string_view_type = jsoncons::basic_string_view<CharT>;
540
    using char_type = CharT;
541
private:
542
    struct string_maps_to_double
543
    {
544
        string_view_type s;
545
546
        bool operator()(const std::pair<string_view_type,double>& val) const
547
0
        {
548
0
            return val.first == s;
549
0
        }
550
    };
551
552
    using temp_allocator_type = TempAllocator;
553
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type;
554
    using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
555
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type;
556
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type;
557
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type;
558
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type;
559
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type;
560
561
    static constexpr int default_depth = 3;
562
563
    temp_allocator_type alloc_;
564
    csv_parse_state state_;
565
    std::function<bool(csv_errc,const ser_context&)> err_handler_;
566
    std::size_t column_{1};
567
    std::size_t line_{1};
568
    int nesting_depth_{default_depth};
569
    bool assume_header_;
570
    char_type comment_starter_;
571
    char_type field_delimiter_;
572
    std::size_t header_lines_;
573
    bool ignore_empty_values_;
574
    bool ignore_empty_lines_;
575
    bool infer_types_;
576
    bool lossless_number_; 
577
    csv_mapping_kind mapping_kind_;
578
    std::size_t max_lines_;
579
    char_type quote_char_;
580
    char_type quote_escape_char_;
581
    char_type subfield_delimiter_;
582
    bool trim_leading_;
583
    bool trim_leading_inside_quotes_;
584
    bool trim_trailing_;
585
    bool trim_trailing_inside_quotes_;
586
    bool unquoted_empty_value_is_null_;
587
    std::size_t min_column_names_{0};
588
    std::size_t column_index_{0};
589
    int level_{0};
590
    std::size_t depth_{0};
591
    std::size_t offset_{0};
592
    const CharT* begin_input_{nullptr};
593
    const CharT* input_end_{nullptr};
594
    const CharT* input_ptr_{nullptr};
595
    bool more_{true};
596
    std::size_t header_line_{1};
597
    bool cursor_mode_{false};
598
    bool actual_cursor_mode_{false};
599
    int mark_level_{0};
600
    std::size_t header_line_offset_{0};
601
602
    detail::m_columns_filter<CharT,TempAllocator> m_columns_filter_;
603
    std::vector<csv_mode,csv_mode_allocator_type> stack_;
604
    std::vector<string_type,string_allocator_type> column_names_;
605
    std::vector<csv_type_info,csv_type_info_allocator_type> column_types_;
606
    std::vector<string_type,string_allocator_type> column_defaults_;
607
    std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_;
608
    string_type buffer_;
609
    std::vector<std::pair<std::basic_string<char_type>,double>> string_double_map_;
610
611
public:
612
    basic_csv_parser(const TempAllocator& alloc = TempAllocator())
613
       : basic_csv_parser(basic_csv_decode_options<CharT>(), 
614
                          default_csv_parsing(),
615
                          alloc)
616
    {
617
    }
618
619
    basic_csv_parser(const basic_csv_decode_options<CharT>& options,
620
                     const TempAllocator& alloc = TempAllocator())
621
        : basic_csv_parser(options, 
622
                           default_csv_parsing(),
623
                           alloc)
624
    {
625
    }
626
627
    basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler,
628
                     const TempAllocator& alloc = TempAllocator())
629
        : basic_csv_parser(basic_csv_decode_options<CharT>(), 
630
                           err_handler,
631
                           alloc)
632
    {
633
    }
634
635
    basic_csv_parser(const basic_csv_decode_options<CharT>& options,
636
                     std::function<bool(csv_errc,const ser_context&)> err_handler,
637
                     const TempAllocator& alloc = TempAllocator())
638
6.29k
       : alloc_(alloc),
639
6.29k
         state_(csv_parse_state::start),
640
6.29k
         err_handler_(err_handler),
641
6.29k
         assume_header_(options.assume_header()),                  
642
6.29k
         comment_starter_(options.comment_starter()),
643
6.29k
         field_delimiter_(options.field_delimiter()),
644
6.29k
         header_lines_(options.header_lines()),
645
6.29k
         ignore_empty_values_(options.ignore_empty_values()),
646
6.29k
         ignore_empty_lines_(options.ignore_empty_lines()),
647
6.29k
         infer_types_(options.infer_types()),
648
6.29k
         lossless_number_(options.lossless_number()), 
649
6.29k
         mapping_kind_(options.mapping_kind()),
650
6.29k
         max_lines_(options.max_lines()),
651
6.29k
         quote_char_(options.quote_char()),
652
6.29k
         quote_escape_char_(options.quote_escape_char()),
653
6.29k
         subfield_delimiter_(options.subfield_delimiter()),
654
6.29k
         trim_leading_(options.trim_leading()),
655
6.29k
         trim_leading_inside_quotes_(options.trim_leading_inside_quotes()),
656
6.29k
         trim_trailing_(options.trim_trailing()),
657
6.29k
         trim_trailing_inside_quotes_(options.trim_trailing_inside_quotes()),
658
6.29k
         unquoted_empty_value_is_null_(options.unquoted_empty_value_is_null()),
659
6.29k
         m_columns_filter_(alloc),
660
6.29k
         stack_(alloc),
661
6.29k
         column_names_(alloc),
662
6.29k
         column_types_(alloc),
663
6.29k
         column_defaults_(alloc),
664
6.29k
         state_stack_(alloc),
665
6.29k
         buffer_(alloc)
666
6.29k
    {
667
6.29k
        if (options.enable_str_to_nan())
668
0
        {
669
0
            string_double_map_.emplace_back(options.nan_to_str(),std::nan(""));
670
0
        }
671
6.29k
        if (options.enable_str_to_inf())
672
0
        {
673
0
            string_double_map_.emplace_back(options.inf_to_str(),std::numeric_limits<double>::infinity());
674
0
        }
675
6.29k
        if (options.enable_str_to_neginf())
676
0
        {
677
0
            string_double_map_.emplace_back(options.neginf_to_str(),-std::numeric_limits<double>::infinity());
678
0
        }
679
680
6.29k
        jsoncons::csv::detail::parse_column_types(options.column_types(), column_types_);
681
6.29k
        jsoncons::csv::detail::parse_column_names(options.column_defaults(), column_defaults_);
682
6.29k
        jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_);
683
6.29k
        min_column_names_ = column_names_.size();
684
6.29k
        initialize();
685
6.29k
    }
686
687
    ~basic_csv_parser() noexcept
688
6.29k
    {
689
6.29k
    }
690
691
    void cursor_mode(bool value)
692
    {
693
        actual_cursor_mode_ = value;
694
        cursor_mode_ = (mapping_kind_ == csv_mapping_kind::m_columns) ? false : value;
695
    }
696
697
    int level() const
698
6.97k
    {
699
6.97k
        return level_;
700
6.97k
    }
701
702
    int mark_level() const 
703
    {
704
        return mark_level_;
705
    }
706
707
    void mark_level(int value)
708
    {
709
        mark_level_ = value;
710
    }
711
712
    bool done() const
713
    {
714
        return state_ == csv_parse_state::done;
715
    }
716
717
    bool accept() const
718
    {
719
        return state_ == csv_parse_state::accept || state_ == csv_parse_state::done;
720
    }
721
722
    bool stopped() const
723
50.8k
    {
724
50.8k
        return !more_;
725
50.8k
    }
726
727
    bool source_exhausted() const
728
44.6k
    {
729
44.6k
        return input_ptr_ == input_end_;
730
44.6k
    }
731
732
    const std::vector<string_type,string_allocator_type>& column_labels() const
733
    {
734
        return column_names_;
735
    }
736
737
    void reinitialize()
738
    {
739
        state_ = csv_parse_state::start;
740
        column_ = 1;
741
        line_ = 1;
742
        nesting_depth_ = default_depth;
743
        column_index_ = 0;
744
        level_ = 0;
745
        depth_ = 0;
746
        offset_ = 0;
747
        begin_input_ = nullptr;
748
        input_end_ = nullptr;
749
        input_ptr_ = nullptr;
750
        more_ = true;
751
        header_line_ = 1;
752
        m_columns_filter_.reset();
753
        stack_.clear();
754
        column_names_.erase(column_names_.begin() + min_column_names_, column_names_.end());
755
        state_stack_.clear();
756
        buffer_.clear();
757
758
        initialize();
759
    }
760
761
    void restart()
762
    {
763
        more_ = true;
764
    }
765
766
    void parse_some(basic_json_visitor<CharT>& visitor)
767
    {
768
        std::error_code ec;
769
        parse_some(visitor, ec);
770
        if (JSONCONS_UNLIKELY(ec))
771
        {
772
            JSONCONS_THROW(ser_error(ec,line_,column_));
773
        }
774
    }
775
776
    void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec)
777
44.6k
    {
778
44.6k
        basic_json_visitor<CharT>& local_visitor = (mapping_kind_ == csv_mapping_kind::m_columns) 
779
44.6k
            ? m_columns_filter_ : visitor;
780
781
44.6k
        switch (mapping_kind_)
782
44.6k
        {
783
0
            case csv_mapping_kind::m_columns:
784
0
                cursor_mode_ = false;
785
0
                break;
786
44.6k
            default:
787
44.6k
                break;
788
44.6k
        } 
789
790
44.6k
        const CharT* local_input_end = input_end_;
791
792
44.6k
        if (input_ptr_ == local_input_end && more_)
793
35.7k
        {
794
35.7k
            switch (state_)
795
35.7k
            {
796
2
                case csv_parse_state::start:
797
2
                    ec = csv_errc::source_error;
798
2
                    more_ = false;
799
2
                    return;
800
5.53k
                case csv_parse_state::before_unquoted_field:
801
5.53k
                case csv_parse_state::before_last_unquoted_field:
802
5.53k
                    end_unquoted_string_value(local_visitor, ec);
803
5.53k
                    state_ = csv_parse_state::before_last_unquoted_field_tail;
804
5.53k
                    break;
805
5.52k
                case csv_parse_state::before_last_unquoted_field_tail:
806
5.52k
                    if (stack_.back() == csv_mode::subfields)
807
0
                    {
808
0
                        stack_.pop_back();
809
0
                        local_visitor.end_array(*this, ec);
810
0
                        more_ = !cursor_mode_;
811
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
812
0
                        {
813
0
                            if (level() == mark_level_)
814
0
                            {
815
0
                                more_ = false;
816
0
                            }
817
0
                        }
818
0
                        --level_;
819
0
                    }
820
5.52k
                    ++column_index_;
821
5.52k
                    state_ = csv_parse_state::end_record;
822
5.52k
                    break;
823
418
                case csv_parse_state::before_unquoted_string: 
824
418
                    buffer_.clear();
825
418
                    JSONCONS_FALLTHROUGH;
826
5.53k
                case csv_parse_state::unquoted_string: 
827
5.53k
                    if (trim_leading_ || trim_trailing_)
828
0
                    {
829
0
                        trim_string_buffer(trim_leading_,trim_trailing_);
830
0
                    }
831
5.53k
                    if (ignore_empty_values_ && buffer_.empty())
832
0
                    {
833
0
                        state_ = csv_parse_state::end_record;
834
0
                    }
835
5.53k
                    else
836
5.53k
                    {
837
5.53k
                        before_value(local_visitor, ec);
838
5.53k
                        state_ = csv_parse_state::before_unquoted_field;
839
5.53k
                    }
840
5.53k
                    break;
841
80
                case csv_parse_state::before_last_quoted_field:
842
80
                    end_quoted_string_value(local_visitor, ec);
843
80
                    ++column_index_;
844
80
                    state_ = csv_parse_state::end_record;
845
80
                    break;
846
80
                case csv_parse_state::escaped_value:
847
80
                    if (quote_escape_char_ == quote_char_)
848
80
                    {
849
80
                        if (!(ignore_empty_values_ && buffer_.empty()))
850
80
                        {
851
80
                            before_value(local_visitor, ec);
852
80
                            ++column_;
853
80
                            state_ = csv_parse_state::before_last_quoted_field;
854
80
                        }
855
0
                        else
856
0
                        {
857
0
                            state_ = csv_parse_state::end_record;
858
0
                        }
859
80
                    }
860
0
                    else
861
0
                    {
862
0
                        ec = csv_errc::invalid_escaped_char;
863
0
                        more_ = false;
864
0
                        return;
865
0
                    }
866
80
                    break;
867
6.14k
                case csv_parse_state::end_record:
868
6.14k
                    if (column_index_ > 0)
869
5.64k
                    {
870
5.64k
                        end_record(local_visitor, ec);
871
5.64k
                    }
872
6.14k
                    state_ = csv_parse_state::no_more_records;
873
6.14k
                    break;
874
6.14k
                case csv_parse_state::no_more_records: 
875
6.14k
                    switch (stack_.back()) 
876
6.14k
                    {
877
120
                        case csv_mode::header:
878
120
                            stack_.pop_back();
879
120
                            break;
880
6.02k
                        case csv_mode::data:
881
6.02k
                            stack_.pop_back();
882
6.02k
                            break;
883
0
                        default:
884
0
                            break;
885
6.14k
                    }
886
6.14k
                    local_visitor.end_array(*this, ec);
887
6.14k
                    more_ = !cursor_mode_;
888
6.14k
                    if (mapping_kind_ != csv_mapping_kind::m_columns)
889
6.14k
                    {
890
6.14k
                        if (level() == mark_level_)
891
0
                        {
892
0
                            more_ = false;
893
0
                        }
894
6.14k
                    }
895
6.14k
                    --level_;
896
                    
897
6.14k
                    if (mapping_kind_ == csv_mapping_kind::m_columns)
898
0
                    {
899
0
                        if (!m_columns_filter_.done())
900
0
                        {
901
0
                            more_ = m_columns_filter_.replay_parse_events(visitor, actual_cursor_mode_, mark_level_);
902
0
                        }
903
0
                        else
904
0
                        {
905
0
                            state_ = csv_parse_state::accept;
906
0
                        }
907
0
                    }
908
6.14k
                    else
909
6.14k
                    {
910
6.14k
                        state_ = csv_parse_state::accept;
911
6.14k
                    }
912
6.14k
                    break;
913
6.14k
                case csv_parse_state::accept:
914
6.14k
                    if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial))
915
0
                    {
916
0
                        err_handler_(csv_errc::unexpected_eof, *this);
917
0
                        ec = csv_errc::unexpected_eof;
918
0
                        more_ = false;
919
0
                        return;
920
0
                    }
921
6.14k
                    stack_.pop_back();
922
6.14k
                    local_visitor.flush();
923
6.14k
                    state_ = csv_parse_state::done;
924
6.14k
                    more_ = false;
925
6.14k
                    return;
926
541
                default:
927
541
                    state_ = csv_parse_state::end_record;
928
541
                    break;
929
35.7k
            }
930
35.7k
        }
931
932
281M
        for (; (input_ptr_ < local_input_end) && more_;)
933
281M
        {
934
281M
            CharT curr_char = *input_ptr_;
935
936
281M
            switch (state_) 
937
281M
            {
938
10.0M
                case csv_parse_state::cr:
939
10.0M
                    ++line_;
940
10.0M
                    column_ = 1;
941
10.0M
                    switch (*input_ptr_)
942
10.0M
                    {
943
2.17k
                        case '\n':
944
2.17k
                            ++input_ptr_;
945
2.17k
                            state_ = pop_state();
946
2.17k
                            break;
947
10.0M
                        default:
948
10.0M
                            state_ = pop_state();
949
10.0M
                            break;
950
10.0M
                    }
951
10.0M
                    break;
952
10.0M
                case csv_parse_state::start:
953
6.27k
                    if (mapping_kind_ != csv_mapping_kind::m_columns)
954
6.27k
                    {
955
6.27k
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
956
6.27k
                        more_ = !cursor_mode_;
957
6.27k
                        ++level_;
958
6.27k
                    }
959
6.27k
                    if (assume_header_ && mapping_kind_ == csv_mapping_kind::n_rows && !column_names_.empty())
960
0
                    {
961
0
                        column_index_ = 0; 
962
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
963
0
                        ++level_;
964
0
                        more_ = !cursor_mode_;
965
0
                        state_ = csv_parse_state::expect_comment_or_record;
966
0
                    }
967
6.27k
                    else
968
6.27k
                    {
969
6.27k
                        state_ = csv_parse_state::expect_comment_or_record;
970
6.27k
                    }
971
6.27k
                    break;
972
10.0M
                case csv_parse_state::comment: 
973
10.0M
                    switch (curr_char)
974
10.0M
                    {
975
1.82M
                        case '\n':
976
1.82M
                        {
977
1.82M
                            ++line_;
978
1.82M
                            if (stack_.back() == csv_mode::header)
979
1.54M
                            {
980
1.54M
                                ++header_line_offset_;
981
1.54M
                            }
982
1.82M
                            column_ = 1;
983
1.82M
                            state_ = csv_parse_state::expect_comment_or_record;
984
1.82M
                            break;
985
0
                        }
986
12.8k
                        case '\r':
987
12.8k
                            ++line_;
988
12.8k
                            if (stack_.back() == csv_mode::header)
989
891
                            {
990
891
                                ++header_line_offset_;
991
891
                            }
992
12.8k
                            column_ = 1;
993
12.8k
                            state_ = csv_parse_state::expect_comment_or_record;
994
12.8k
                            push_state(state_);
995
12.8k
                            state_ = csv_parse_state::cr;
996
12.8k
                            break;
997
8.25M
                        default:
998
8.25M
                            ++column_;
999
8.25M
                            break;
1000
10.0M
                    }
1001
10.0M
                    ++input_ptr_;
1002
10.0M
                    break;
1003
                
1004
5.04M
                case csv_parse_state::expect_comment_or_record:
1005
5.04M
                    buffer_.clear();
1006
5.04M
                    if (curr_char == comment_starter_)
1007
1.84M
                    {
1008
1.84M
                        state_ = csv_parse_state::comment;
1009
1.84M
                        ++column_;
1010
1.84M
                        ++input_ptr_;
1011
1.84M
                    }
1012
3.20M
                    else
1013
3.20M
                    {
1014
3.20M
                        state_ = csv_parse_state::expect_record;
1015
3.20M
                    }
1016
5.04M
                    break;
1017
1.91M
                case csv_parse_state::quoted_string: 
1018
1.91M
                    {
1019
1.91M
                        if (curr_char == quote_escape_char_)
1020
8.34k
                        {
1021
8.34k
                            state_ = csv_parse_state::escaped_value;
1022
8.34k
                        }
1023
1.90M
                        else if (curr_char == quote_char_)
1024
0
                        {
1025
0
                            state_ = csv_parse_state::between_values;
1026
0
                        }
1027
1.90M
                        else
1028
1.90M
                        {
1029
1.90M
                            buffer_.push_back(static_cast<CharT>(curr_char));
1030
1.90M
                        }
1031
1.91M
                    }
1032
1.91M
                    ++column_;
1033
1.91M
                    ++input_ptr_;
1034
1.91M
                    break;
1035
8.26k
                case csv_parse_state::escaped_value: 
1036
8.26k
                    {
1037
8.26k
                        if (curr_char == quote_char_)
1038
2.07k
                        {
1039
2.07k
                            buffer_.push_back(static_cast<CharT>(curr_char));
1040
2.07k
                            state_ = csv_parse_state::quoted_string;
1041
2.07k
                            ++column_;
1042
2.07k
                            ++input_ptr_;
1043
2.07k
                        }
1044
6.18k
                        else if (quote_escape_char_ == quote_char_)
1045
6.18k
                        {
1046
6.18k
                            state_ = csv_parse_state::between_values;
1047
6.18k
                        }
1048
0
                        else
1049
0
                        {
1050
0
                            ec = csv_errc::invalid_escaped_char;
1051
0
                            more_ = false;
1052
0
                            return;
1053
0
                        }
1054
8.26k
                    }
1055
8.26k
                    break;
1056
8.26k
                case csv_parse_state::between_values:
1057
7.06k
                    switch (curr_char)
1058
7.06k
                    {
1059
1.45k
                        case '\r':
1060
3.12k
                        case '\n':
1061
3.12k
                        {
1062
3.12k
                            if (trim_leading_ || trim_trailing_)
1063
0
                            {
1064
0
                                trim_string_buffer(trim_leading_,trim_trailing_);
1065
0
                            }
1066
3.12k
                            if (!(ignore_empty_values_ && buffer_.empty()))
1067
3.12k
                            {
1068
3.12k
                                before_value(local_visitor, ec);
1069
3.12k
                                state_ = csv_parse_state::before_last_quoted_field;
1070
3.12k
                            }
1071
0
                            else
1072
0
                            {
1073
0
                                state_ = csv_parse_state::end_record;
1074
0
                            }
1075
3.12k
                            break;
1076
1.45k
                        }
1077
3.94k
                        default:
1078
3.94k
                            if (curr_char == field_delimiter_)
1079
2.91k
                            {
1080
2.91k
                                if (trim_leading_ || trim_trailing_)
1081
0
                                {
1082
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1083
0
                                }
1084
2.91k
                                before_value(local_visitor, ec);
1085
2.91k
                                state_ = csv_parse_state::before_quoted_field;
1086
2.91k
                            }
1087
1.03k
                            else if (subfield_delimiter_ != char_type() && curr_char == subfield_delimiter_)
1088
0
                            {
1089
0
                                if (trim_leading_ || trim_trailing_)
1090
0
                                {
1091
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1092
0
                                }
1093
0
                                before_value(local_visitor, ec);
1094
0
                                state_ = csv_parse_state::before_quoted_subfield;
1095
0
                            }
1096
1.03k
                            else if (curr_char == ' ' || curr_char == '\t')
1097
914
                            {
1098
914
                                ++column_;
1099
914
                                ++input_ptr_;
1100
914
                            }
1101
116
                            else
1102
116
                            {
1103
116
                                ec = csv_errc::unexpected_char_between_fields;
1104
116
                                more_ = false;
1105
116
                                return;
1106
116
                            }
1107
3.82k
                            break;
1108
7.06k
                    }
1109
6.94k
                    break;
1110
41.5M
                case csv_parse_state::before_unquoted_string: 
1111
41.5M
                {
1112
41.5M
                    buffer_.clear();
1113
41.5M
                    state_ = csv_parse_state::unquoted_string;
1114
41.5M
                    break;
1115
7.06k
                }
1116
41.5M
                case csv_parse_state::before_unquoted_field:
1117
41.5M
                    end_unquoted_string_value(local_visitor, ec);
1118
41.5M
                    state_ = csv_parse_state::before_unquoted_field_tail;
1119
41.5M
                    break;
1120
41.5M
                case csv_parse_state::before_unquoted_field_tail:
1121
41.5M
                {
1122
41.5M
                    if (stack_.back() == csv_mode::subfields)
1123
0
                    {
1124
0
                        stack_.pop_back();
1125
0
                        local_visitor.end_array(*this, ec);
1126
0
                        more_ = !cursor_mode_;
1127
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1128
0
                        {
1129
0
                            if (level() == mark_level_)
1130
0
                            {
1131
0
                                more_ = false;
1132
0
                            }
1133
0
                        }
1134
0
                        --level_;
1135
0
                    }
1136
41.5M
                    ++column_index_;
1137
41.5M
                    state_ = csv_parse_state::before_unquoted_string;
1138
41.5M
                    ++column_;
1139
41.5M
                    ++input_ptr_;
1140
41.5M
                    break;
1141
7.06k
                }
1142
0
                case csv_parse_state::before_unquoted_field_tail1:
1143
0
                {
1144
0
                    if (stack_.back() == csv_mode::subfields)
1145
0
                    {
1146
0
                        stack_.pop_back();
1147
0
                        local_visitor.end_array(*this, ec);
1148
0
                        more_ = !cursor_mode_;
1149
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1150
0
                        {
1151
0
                            if (level() == mark_level_)
1152
0
                            {
1153
0
                                more_ = false;
1154
0
                            }
1155
0
                        }
1156
0
                        --level_;
1157
0
                    }
1158
0
                    state_ = csv_parse_state::end_record;
1159
0
                    ++column_;
1160
0
                    ++input_ptr_;
1161
0
                    break;
1162
7.06k
                }
1163
1164
3.13M
                case csv_parse_state::before_last_unquoted_field:
1165
3.13M
                    end_unquoted_string_value(local_visitor, ec);
1166
3.13M
                    state_ = csv_parse_state::before_last_unquoted_field_tail;
1167
3.13M
                    break;
1168
1169
3.13M
                case csv_parse_state::before_last_unquoted_field_tail:
1170
3.13M
                    if (stack_.back() == csv_mode::subfields)
1171
0
                    {
1172
0
                        stack_.pop_back();
1173
0
                        local_visitor.end_array(*this, ec);
1174
0
                        more_ = !cursor_mode_;
1175
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1176
0
                        {
1177
0
                            if (level() == mark_level_)
1178
0
                            {
1179
0
                                more_ = false;
1180
0
                            }
1181
0
                        }
1182
0
                        --level_;
1183
0
                    }
1184
3.13M
                    ++column_index_;
1185
3.13M
                    state_ = csv_parse_state::end_record;
1186
3.13M
                    break;
1187
1188
0
                case csv_parse_state::before_unquoted_subfield:
1189
0
                    if (stack_.back() == csv_mode::data)
1190
0
                    {
1191
0
                        stack_.push_back(csv_mode::subfields);
1192
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1193
0
                        more_ = !cursor_mode_;
1194
0
                        ++level_;
1195
0
                    }
1196
0
                    state_ = csv_parse_state::before_unquoted_subfield_tail;
1197
0
                    break; 
1198
0
                case csv_parse_state::before_unquoted_subfield_tail:
1199
0
                    end_unquoted_string_value(local_visitor, ec);
1200
0
                    state_ = csv_parse_state::before_unquoted_string;
1201
0
                    ++column_;
1202
0
                    ++input_ptr_;
1203
0
                    break;
1204
2.91k
                case csv_parse_state::before_quoted_field:
1205
2.91k
                    end_quoted_string_value(local_visitor, ec);
1206
2.91k
                    state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted
1207
2.91k
                    break;
1208
0
                case csv_parse_state::before_quoted_subfield:
1209
0
                    if (stack_.back() == csv_mode::data)
1210
0
                    {
1211
0
                        stack_.push_back(csv_mode::subfields);
1212
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1213
0
                        more_ = !cursor_mode_;
1214
0
                        ++level_;
1215
0
                    }
1216
0
                    state_ = csv_parse_state::before_quoted_subfield_tail;
1217
0
                    break; 
1218
0
                case csv_parse_state::before_quoted_subfield_tail:
1219
0
                    end_quoted_string_value(local_visitor, ec);
1220
0
                    state_ = csv_parse_state::before_unquoted_string;
1221
0
                    ++column_;
1222
0
                    ++input_ptr_;
1223
0
                    break;
1224
3.12k
                case csv_parse_state::before_last_quoted_field:
1225
3.12k
                    end_quoted_string_value(local_visitor, ec);
1226
3.12k
                    state_ = csv_parse_state::before_last_quoted_field_tail;
1227
3.12k
                    break;
1228
3.12k
                case csv_parse_state::before_last_quoted_field_tail:
1229
3.12k
                    if (stack_.back() == csv_mode::subfields)
1230
0
                    {
1231
0
                        stack_.pop_back();
1232
0
                        local_visitor.end_array(*this, ec);
1233
0
                        more_ = !cursor_mode_;
1234
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1235
0
                        {
1236
0
                            if (level() == mark_level_)
1237
0
                            {
1238
0
                                more_ = false;
1239
0
                            }
1240
0
                        }
1241
0
                        --level_;
1242
0
                    }
1243
3.12k
                    ++column_index_;
1244
3.12k
                    state_ = csv_parse_state::end_record;
1245
3.12k
                    break;
1246
109M
                case csv_parse_state::unquoted_string: 
1247
109M
                {
1248
109M
                    switch (curr_char)
1249
109M
                    {
1250
656k
                        case '\n':
1251
3.13M
                        case '\r':
1252
3.13M
                        {
1253
3.13M
                            if (trim_leading_ || trim_trailing_)
1254
0
                            {
1255
0
                                trim_string_buffer(trim_leading_,trim_trailing_);
1256
0
                            }
1257
3.13M
                            if (!(ignore_empty_values_ && buffer_.empty()))
1258
3.13M
                            {
1259
3.13M
                                before_value(local_visitor, ec);
1260
3.13M
                                state_ = csv_parse_state::before_last_unquoted_field;
1261
3.13M
                            }
1262
0
                            else
1263
0
                            {
1264
0
                                state_ = csv_parse_state::end_record;
1265
0
                            }
1266
3.13M
                            break;
1267
656k
                        }
1268
106M
                        default:
1269
106M
                            if (curr_char == field_delimiter_)
1270
41.5M
                            {
1271
41.5M
                                if (trim_leading_ || trim_trailing_)
1272
0
                                {
1273
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1274
0
                                }
1275
41.5M
                                before_value(local_visitor, ec);
1276
41.5M
                                state_ = csv_parse_state::before_unquoted_field;
1277
41.5M
                            }
1278
65.2M
                            else if (subfield_delimiter_ != char_type() && curr_char == subfield_delimiter_)
1279
0
                            {
1280
0
                                if (trim_leading_ || trim_trailing_)
1281
0
                                {
1282
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1283
0
                                }
1284
0
                                before_value(local_visitor, ec);
1285
0
                                state_ = csv_parse_state::before_unquoted_subfield;
1286
0
                            }
1287
65.2M
                            else if (curr_char == quote_char_)
1288
3.23k
                            {
1289
3.23k
                                buffer_.clear();
1290
3.23k
                                state_ = csv_parse_state::quoted_string;
1291
3.23k
                                ++column_;
1292
3.23k
                                ++input_ptr_;
1293
3.23k
                            }
1294
65.2M
                            else
1295
65.2M
                            {
1296
65.2M
                                buffer_.push_back(static_cast<CharT>(curr_char));
1297
65.2M
                                ++column_;
1298
65.2M
                                ++input_ptr_;
1299
65.2M
                            }
1300
106M
                            break;
1301
109M
                    }
1302
109M
                    break;
1303
109M
                }
1304
109M
                case csv_parse_state::expect_record: 
1305
10.8M
                {
1306
10.8M
                    switch (curr_char)
1307
10.8M
                    {
1308
66.0k
                        case '\n':
1309
66.0k
                        {
1310
66.0k
                            if (!ignore_empty_lines_)
1311
0
                            {
1312
0
                                begin_record(local_visitor, ec);
1313
0
                                state_ = csv_parse_state::end_record;
1314
0
                            }
1315
66.0k
                            else
1316
66.0k
                            {
1317
66.0k
                                ++line_;
1318
66.0k
                                column_ = 1;
1319
66.0k
                                state_ = csv_parse_state::expect_comment_or_record;
1320
66.0k
                                ++input_ptr_;
1321
66.0k
                            }
1322
66.0k
                            break;
1323
0
                        }
1324
7.59M
                        case '\r':
1325
7.59M
                            if (!ignore_empty_lines_)
1326
0
                            {
1327
0
                                begin_record(local_visitor, ec);
1328
0
                                state_ = csv_parse_state::end_record;
1329
0
                            }
1330
7.59M
                            else
1331
7.59M
                            {
1332
7.59M
                                ++input_ptr_;
1333
7.59M
                                push_state(state_);
1334
7.59M
                                state_ = csv_parse_state::cr;
1335
7.59M
                            }
1336
7.59M
                            break;
1337
2.97k
                        case ' ':
1338
64.9k
                        case '\t':
1339
64.9k
                            if (!trim_leading_)
1340
64.9k
                            {
1341
64.9k
                                buffer_.push_back(static_cast<CharT>(curr_char));
1342
64.9k
                                begin_record(local_visitor, ec);
1343
64.9k
                                state_ = csv_parse_state::unquoted_string;
1344
64.9k
                            }
1345
64.9k
                            ++column_;
1346
64.9k
                            ++input_ptr_;
1347
64.9k
                            break;
1348
3.07M
                        default:
1349
3.07M
                            begin_record(local_visitor, ec);
1350
3.07M
                            if (curr_char == quote_char_)
1351
3.18k
                            {
1352
3.18k
                                buffer_.clear();
1353
3.18k
                                state_ = csv_parse_state::quoted_string;
1354
3.18k
                                ++column_;
1355
3.18k
                                ++input_ptr_;
1356
3.18k
                            }
1357
3.07M
                            else
1358
3.07M
                            {
1359
3.07M
                                state_ = csv_parse_state::unquoted_string;
1360
3.07M
                            }
1361
3.07M
                            break;
1362
10.8M
                        }
1363
10.8M
                    break;
1364
10.8M
                    }
1365
10.8M
                case csv_parse_state::end_record: 
1366
3.13M
                {
1367
3.13M
                    switch (curr_char)
1368
3.13M
                    {
1369
658k
                        case '\n':
1370
658k
                        {
1371
658k
                            ++line_;
1372
658k
                            column_ = 1;
1373
658k
                            state_ = csv_parse_state::expect_comment_or_record;
1374
658k
                            end_record(local_visitor, ec);
1375
658k
                            ++input_ptr_;
1376
658k
                            break;
1377
0
                        }
1378
2.47M
                        case '\r':
1379
2.47M
                            ++line_;
1380
2.47M
                            column_ = 1;
1381
2.47M
                            state_ = csv_parse_state::expect_comment_or_record;
1382
2.47M
                            end_record(local_visitor, ec);
1383
2.47M
                            push_state(state_);
1384
2.47M
                            state_ = csv_parse_state::cr;
1385
2.47M
                            ++input_ptr_;
1386
2.47M
                            break;
1387
0
                        case ' ':
1388
0
                        case '\t':
1389
0
                            ++column_;
1390
0
                            ++input_ptr_;
1391
0
                            break;
1392
0
                        default:
1393
0
                            err_handler_(csv_errc::syntax_error, *this);
1394
0
                            ec = csv_errc::syntax_error;
1395
0
                            more_ = false;
1396
0
                            return;
1397
3.13M
                        }
1398
3.13M
                    break;
1399
3.13M
                }
1400
3.13M
                default:
1401
0
                    err_handler_(csv_errc::invalid_parse_state, *this);
1402
0
                    ec = csv_errc::invalid_parse_state;
1403
0
                    more_ = false;
1404
0
                    return;
1405
281M
            }
1406
281M
            if (line_ > max_lines_)
1407
0
            {
1408
0
                state_ = csv_parse_state::done;
1409
0
                more_ = false;
1410
0
            }
1411
281M
        }
1412
38.5k
    }
1413
1414
    void finish_parse()
1415
    {
1416
        std::error_code ec;
1417
        finish_parse(ec);
1418
        if (JSONCONS_UNLIKELY(ec))
1419
        {
1420
            JSONCONS_THROW(ser_error(ec,line_,column_));
1421
        }
1422
    }
1423
1424
    void finish_parse(std::error_code& ec)
1425
    {
1426
        while (more_)
1427
        {
1428
            parse_some(ec);
1429
        }
1430
    }
1431
1432
    csv_parse_state state() const
1433
    {
1434
        return state_;
1435
    }
1436
1437
    void update(const string_view_type sv)
1438
    {
1439
        update(sv.data(),sv.length());
1440
    }
1441
1442
    void update(const CharT* data, std::size_t length)
1443
8.92k
    {
1444
8.92k
        begin_input_ = data;
1445
8.92k
        input_end_ = data + length;
1446
8.92k
        input_ptr_ = begin_input_;
1447
8.92k
    }
1448
1449
    std::size_t line() const override
1450
92
    {
1451
92
        return line_;
1452
92
    }
1453
1454
    std::size_t column() const override
1455
92
    {
1456
92
        return column_;
1457
92
    }
1458
1459
private:
1460
    void initialize()
1461
6.29k
    {
1462
6.29k
        stack_.reserve(default_depth);
1463
6.29k
        stack_.push_back(csv_mode::initial);
1464
6.29k
        stack_.push_back((header_lines_ > 0) ? csv_mode::header : csv_mode::data);
1465
6.29k
    }
1466
1467
    // name
1468
    void before_value(basic_json_visitor<CharT>& visitor, 
1469
        std::error_code& ec)
1470
44.6M
    {
1471
44.6M
        switch (stack_.back())
1472
44.6M
        {
1473
11.5M
            case csv_mode::header:
1474
11.5M
                if (trim_leading_inside_quotes_ || trim_trailing_inside_quotes_)
1475
0
                {
1476
0
                    trim_string_buffer(trim_leading_inside_quotes_,trim_trailing_inside_quotes_);
1477
0
                }
1478
11.5M
                if (line_ == (header_line_+header_line_offset_) && column_index_ >= min_column_names_)
1479
11.5M
                {
1480
11.5M
                    column_names_.push_back(buffer_);
1481
11.5M
                    if (assume_header_ && mapping_kind_ == csv_mapping_kind::n_rows)
1482
11.5M
                    {
1483
11.5M
                        visitor.string_value(buffer_, semantic_tag::none, *this, ec);
1484
11.5M
                        more_ = !cursor_mode_;
1485
11.5M
                    }
1486
11.5M
                }
1487
11.5M
                break;
1488
33.1M
            case csv_mode::data:
1489
33.1M
                if (mapping_kind_ == csv_mapping_kind::n_objects)
1490
0
                {
1491
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1492
0
                    {
1493
0
                        if (column_index_ < column_names_.size() + offset_)
1494
0
                        {
1495
0
                            visitor.key(column_names_[column_index_ - offset_], *this, ec);
1496
0
                            more_ = !cursor_mode_;
1497
0
                        }
1498
0
                    }
1499
0
                }
1500
33.1M
                break;
1501
0
            default:
1502
0
                break;
1503
44.6M
        }
1504
44.6M
    }
1505
1506
    // begin_array or begin_record
1507
    void begin_record(basic_json_visitor<CharT>& visitor, std::error_code& ec)
1508
3.14M
    {
1509
3.14M
        offset_ = 0;
1510
1511
3.14M
        if (stack_.back() == csv_mode::header && line_ > (header_lines_+header_line_offset_))
1512
1.60k
        {
1513
1.60k
            stack_.back() = csv_mode::data;
1514
1.60k
        }
1515
3.14M
        switch (stack_.back())
1516
3.14M
        {
1517
925
            case csv_mode::header:
1518
925
                switch (mapping_kind_)
1519
925
                {
1520
925
                    case csv_mapping_kind::n_rows:
1521
925
                        if (assume_header_ && line_ == (header_line_+header_line_offset_))
1522
925
                        {
1523
925
                            visitor.begin_array(semantic_tag::none, *this, ec);
1524
925
                            more_ = !cursor_mode_;
1525
925
                            ++level_;
1526
925
                        }
1527
925
                        break;
1528
0
                    default:
1529
0
                        break;
1530
925
                }
1531
925
                break;
1532
3.13M
            case csv_mode::data:
1533
3.13M
                switch (mapping_kind_)
1534
3.13M
                {
1535
3.13M
                    case csv_mapping_kind::n_rows:
1536
3.13M
                        visitor.begin_array(semantic_tag::none, *this, ec);
1537
3.13M
                        more_ = !cursor_mode_;
1538
3.13M
                        ++level_;
1539
3.13M
                        break;
1540
0
                    case csv_mapping_kind::n_objects:
1541
0
                        visitor.begin_object(semantic_tag::none, *this, ec);
1542
0
                        more_ = !cursor_mode_;
1543
0
                        ++level_;
1544
0
                        break;
1545
0
                    case csv_mapping_kind::m_columns:
1546
0
                        break;
1547
0
                    default:
1548
0
                        break;
1549
3.13M
                }
1550
3.13M
                break;
1551
3.13M
            default:
1552
0
                break;
1553
3.14M
        }
1554
3.14M
    }
1555
1556
    // end_array, begin_array, string_value (headers)
1557
    void end_record(basic_json_visitor<CharT>& visitor, std::error_code& ec)
1558
3.14M
    {
1559
3.14M
        if (!column_types_.empty())
1560
0
        {
1561
0
            switch (mapping_kind_)
1562
0
            {
1563
0
                case csv_mapping_kind::n_rows:
1564
0
                case csv_mapping_kind::n_objects:
1565
0
                    if (depth_ > 0)
1566
0
                    {
1567
0
                        visitor.end_array(*this, ec);
1568
0
                        more_ = !cursor_mode_;
1569
0
                        if (level_ == mark_level_)
1570
0
                        {
1571
0
                            more_ = false;
1572
0
                        }
1573
0
                        --level_;
1574
0
                        depth_ = 0;
1575
0
                    }
1576
0
                    break;
1577
0
                case csv_mapping_kind::m_columns:
1578
0
                    if (depth_ > 0)
1579
0
                    {
1580
0
                        visitor.end_array(*this, ec);
1581
0
                        more_ = !cursor_mode_;
1582
0
                        --level_;
1583
0
                        depth_ = 0;
1584
0
                    }
1585
0
                    break;
1586
0
                default:
1587
0
                    break;
1588
0
            }
1589
0
        }
1590
3.14M
        switch (stack_.back())
1591
3.14M
        {
1592
827
            case csv_mode::header:
1593
827
                if (line_ >= header_lines_)
1594
827
                {
1595
827
                    stack_.back() = csv_mode::data;
1596
827
                }
1597
827
                switch (mapping_kind_)
1598
827
                {
1599
827
                    case csv_mapping_kind::n_rows:
1600
827
                        if (assume_header_)
1601
827
                        {
1602
827
                            visitor.end_array(*this, ec);
1603
827
                            more_ = !cursor_mode_;
1604
827
                            if (level() == mark_level_)
1605
0
                            {
1606
0
                                more_ = false;
1607
0
                            }
1608
827
                            --level_;
1609
827
                        }
1610
827
                        break;
1611
0
                    case csv_mapping_kind::m_columns:
1612
0
                        m_columns_filter_.initialize(column_names_);
1613
0
                        break;
1614
0
                    default:
1615
0
                        break;
1616
827
                }
1617
827
                break;
1618
3.13M
            case csv_mode::data:
1619
3.13M
            case csv_mode::subfields:
1620
3.13M
            {
1621
3.13M
                switch (mapping_kind_)
1622
3.13M
                {
1623
3.13M
                    case csv_mapping_kind::n_rows:
1624
3.13M
                        visitor.end_array(*this, ec);
1625
3.13M
                        more_ = !cursor_mode_;
1626
3.13M
                        if (level_ == mark_level_)
1627
0
                        {
1628
0
                            more_ = false;
1629
0
                        }
1630
3.13M
                        --level_;
1631
3.13M
                        break;
1632
0
                    case csv_mapping_kind::n_objects:
1633
0
                        visitor.end_object(*this, ec);
1634
0
                        more_ = !cursor_mode_;
1635
0
                        if (level_ == mark_level_)
1636
0
                        {
1637
0
                            more_ = false;
1638
0
                        }
1639
0
                        --level_;
1640
0
                        break;
1641
0
                    case csv_mapping_kind::m_columns:
1642
0
                        visitor.end_array(*this, ec);
1643
0
                        more_ = !cursor_mode_;
1644
0
                        --level_;
1645
0
                        break;
1646
3.13M
                }
1647
3.13M
                break;
1648
3.13M
            }
1649
3.13M
            default:
1650
0
                break;
1651
3.14M
        }
1652
3.14M
        column_index_ = 0;
1653
3.14M
    }
1654
1655
    void trim_string_buffer(bool trim_leading, bool trim_trailing)
1656
0
    {
1657
0
        std::size_t start = 0;
1658
0
        std::size_t length = buffer_.length();
1659
0
        if (trim_leading)
1660
0
        {
1661
0
            bool done = false;
1662
0
            while (!done && start < buffer_.length())
1663
0
            {
1664
0
                if ((buffer_[start] < 256) && std::isspace(buffer_[start]))
1665
0
                {
1666
0
                    ++start;
1667
0
                }
1668
0
                else
1669
0
                {
1670
0
                    done = true;
1671
0
                }
1672
0
            }
1673
0
        }
1674
0
        if (trim_trailing)
1675
0
        {
1676
0
            bool done = false;
1677
0
            while (!done && length > 0)
1678
0
            {
1679
0
                if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1]))
1680
0
                {
1681
0
                    --length;
1682
0
                }
1683
0
                else
1684
0
                {
1685
0
                    done = true;
1686
0
                }
1687
0
            }
1688
0
        }
1689
0
        if (start != 0 || length != buffer_.size())
1690
0
        {
1691
            // Do not use buffer_.substr(...), as this won't preserve the allocator state.
1692
0
            buffer_.resize(length);
1693
0
            buffer_.erase(0, start);
1694
0
        }
1695
0
    }
1696
1697
    /*
1698
        end_array, begin_array, xxx_value (end_value)
1699
    */
1700
    void end_unquoted_string_value(basic_json_visitor<CharT>& visitor, std::error_code& ec) 
1701
44.6M
    {
1702
44.6M
        switch (stack_.back())
1703
44.6M
        {
1704
33.1M
            case csv_mode::data:
1705
33.1M
            case csv_mode::subfields:
1706
33.1M
                switch (mapping_kind_)
1707
33.1M
                {
1708
33.1M
                case csv_mapping_kind::n_rows:
1709
33.1M
                    if (unquoted_empty_value_is_null_ && buffer_.empty())
1710
0
                    {
1711
0
                        visitor.null_value(semantic_tag::none, *this, ec);
1712
0
                        more_ = !cursor_mode_;
1713
0
                    }
1714
33.1M
                    else
1715
33.1M
                    {
1716
33.1M
                        end_value(visitor, infer_types_, ec);
1717
33.1M
                    }
1718
33.1M
                    break;
1719
0
                case csv_mapping_kind::n_objects:
1720
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1721
0
                    {
1722
0
                        if (column_index_ < column_names_.size() + offset_)
1723
0
                        {
1724
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1725
0
                            {
1726
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1727
0
                                more_ = !cursor_mode_;
1728
0
                            }
1729
0
                            else
1730
0
                            {
1731
0
                                end_value(visitor, infer_types_, ec);
1732
0
                            }
1733
0
                        }
1734
0
                        else if (depth_ > 0)
1735
0
                        {
1736
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1737
0
                            {
1738
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1739
0
                                more_ = !cursor_mode_;
1740
0
                            }
1741
0
                            else
1742
0
                            {
1743
0
                                end_value(visitor, infer_types_, ec);
1744
0
                            }
1745
0
                        }
1746
0
                    }
1747
0
                    break;
1748
0
                case csv_mapping_kind::m_columns:
1749
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1750
0
                    {
1751
0
                        end_value(visitor, infer_types_, ec);
1752
0
                    }
1753
0
                    else
1754
0
                    {
1755
0
                        m_columns_filter_.skip_column();
1756
0
                    }
1757
0
                    break;
1758
33.1M
                }
1759
33.1M
                break;
1760
33.1M
            default:
1761
11.5M
                break;
1762
44.6M
        }
1763
44.6M
    }
1764
1765
    void end_quoted_string_value(basic_json_visitor<CharT>& visitor, 
1766
        std::error_code& ec) 
1767
6.11k
    {
1768
6.11k
        switch (stack_.back())
1769
6.11k
        {
1770
5.31k
            case csv_mode::data:
1771
5.31k
            case csv_mode::subfields:
1772
5.31k
                if (trim_leading_inside_quotes_ || trim_trailing_inside_quotes_)
1773
0
                {
1774
0
                    trim_string_buffer(trim_leading_inside_quotes_,trim_trailing_inside_quotes_);
1775
0
                }
1776
5.31k
                switch (mapping_kind_)
1777
5.31k
                {
1778
5.31k
                case csv_mapping_kind::n_rows:
1779
5.31k
                    end_value(visitor, false, ec);
1780
5.31k
                    break;
1781
0
                case csv_mapping_kind::n_objects:
1782
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1783
0
                    {
1784
0
                        if (column_index_ < column_names_.size() + offset_)
1785
0
                        {
1786
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1787
0
                            {
1788
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1789
0
                                more_ = !cursor_mode_;
1790
0
                            }
1791
0
                            else 
1792
0
                            {
1793
0
                                end_value(visitor, false, ec);
1794
0
                            }
1795
0
                        }
1796
0
                        else if (depth_ > 0)
1797
0
                        {
1798
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1799
0
                            {
1800
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1801
0
                                more_ = !cursor_mode_;
1802
0
                            }
1803
0
                            else
1804
0
                            {
1805
0
                                end_value(visitor, false, ec);
1806
0
                            }
1807
0
                        }
1808
0
                    }
1809
0
                    break;
1810
0
                case csv_mapping_kind::m_columns:
1811
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1812
0
                    {
1813
0
                        end_value(visitor, false, ec);
1814
0
                    }
1815
0
                    else
1816
0
                    {
1817
0
                        m_columns_filter_.skip_column();
1818
0
                    }
1819
0
                    break;
1820
5.31k
                }
1821
5.31k
                break;
1822
5.31k
            default:
1823
803
                break;
1824
6.11k
        }
1825
6.11k
    }
1826
1827
    void end_value(basic_json_visitor<CharT>& visitor, 
1828
        bool infer_types, std::error_code&  ec)
1829
33.1M
    {
1830
33.1M
        auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ });
1831
33.1M
        if (it != string_double_map_.end())
1832
0
        {
1833
0
            visitor.double_value((*it).second, semantic_tag::none, *this, ec);
1834
0
            more_ = !cursor_mode_;
1835
0
        }
1836
33.1M
        else if (column_index_ < column_types_.size() + offset_)
1837
0
        {
1838
0
            if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t)
1839
0
            {
1840
0
                offset_ = offset_ + column_types_[column_index_ - offset_].rep_count;
1841
0
                if (column_index_ - offset_ + 1 < column_types_.size())
1842
0
                {
1843
0
                    if (column_index_ == offset_ || depth_ > column_types_[column_index_-offset_].level)
1844
0
                    {
1845
0
                        visitor.end_array(*this, ec);
1846
0
                        more_ = !cursor_mode_;
1847
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1848
0
                        {
1849
0
                            if (level() == mark_level_)
1850
0
                            {
1851
0
                                more_ = false;
1852
0
                            }
1853
0
                        }
1854
0
                        --level_;
1855
0
                    }
1856
0
                    depth_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level;
1857
0
                }
1858
0
            }
1859
0
            if (depth_ < column_types_[column_index_ - offset_].level)
1860
0
            {
1861
0
                visitor.begin_array(semantic_tag::none, *this, ec);
1862
0
                more_ = !cursor_mode_;
1863
0
                depth_ = column_types_[column_index_ - offset_].level;
1864
0
                ++level_;
1865
0
            }
1866
0
            else if (depth_ > column_types_[column_index_ - offset_].level)
1867
0
            {
1868
0
                visitor.end_array(*this, ec);
1869
0
                more_ = !cursor_mode_;
1870
0
                if (mapping_kind_ != csv_mapping_kind::m_columns)
1871
0
                {
1872
0
                    if (level() == mark_level_)
1873
0
                    {
1874
0
                        more_ = false;
1875
0
                    }
1876
0
                }
1877
0
                --level_;
1878
0
                depth_ = column_types_[column_index_ - offset_].level;
1879
0
            }
1880
0
            switch (column_types_[column_index_ - offset_].col_type)
1881
0
            {
1882
0
                case csv_column_type::integer_t:
1883
0
                    {
1884
0
                        std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_};
1885
0
                        int64_t val;
1886
0
                        iss >> val;
1887
0
                        if (!iss.fail())
1888
0
                        {
1889
0
                            visitor.int64_value(val, semantic_tag::none, *this, ec);
1890
0
                            more_ = !cursor_mode_;
1891
0
                        }
1892
0
                        else
1893
0
                        {
1894
0
                            if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1895
0
                            {
1896
0
                                basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1897
0
                                parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1898
0
                                parser.parse_some(visitor);
1899
0
                                parser.finish_parse(visitor);
1900
0
                            }
1901
0
                            else
1902
0
                            {
1903
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1904
0
                                more_ = !cursor_mode_;
1905
0
                            }
1906
0
                        }
1907
0
                    }
1908
0
                    break;
1909
0
                case csv_column_type::float_t:
1910
0
                    {
1911
0
                        if (lossless_number_)
1912
0
                        {
1913
0
                            visitor.string_value(buffer_,semantic_tag::bigdec, *this, ec);
1914
0
                            more_ = !cursor_mode_;
1915
0
                        }
1916
0
                        else
1917
0
                        {
1918
0
                            std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ };
1919
0
                            double val;
1920
0
                            iss >> val;
1921
0
                            if (!iss.fail())
1922
0
                            {
1923
0
                                visitor.double_value(val, semantic_tag::none, *this, ec);
1924
0
                                more_ = !cursor_mode_;
1925
0
                            }
1926
0
                            else
1927
0
                            {
1928
0
                                if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1929
0
                                {
1930
0
                                    basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1931
0
                                    parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1932
0
                                    parser.parse_some(visitor);
1933
0
                                    parser.finish_parse(visitor);
1934
0
                                }
1935
0
                                else
1936
0
                                {
1937
0
                                    visitor.null_value(semantic_tag::none, *this, ec);
1938
0
                                    more_ = !cursor_mode_;
1939
0
                                }
1940
0
                            }
1941
0
                        }
1942
0
                    }
1943
0
                    break;
1944
0
                case csv_column_type::boolean_t:
1945
0
                    {
1946
0
                        if (buffer_.length() == 1 && buffer_[0] == '0')
1947
0
                        {
1948
0
                            visitor.bool_value(false, semantic_tag::none, *this, ec);
1949
0
                            more_ = !cursor_mode_;
1950
0
                        }
1951
0
                        else if (buffer_.length() == 1 && buffer_[0] == '1')
1952
0
                        {
1953
0
                            visitor.bool_value(true, semantic_tag::none, *this, ec);
1954
0
                            more_ = !cursor_mode_;
1955
0
                        }
1956
0
                        else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E')))
1957
0
                        {
1958
0
                            visitor.bool_value(false, semantic_tag::none, *this, ec);
1959
0
                            more_ = !cursor_mode_;
1960
0
                        }
1961
0
                        else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E')))
1962
0
                        {
1963
0
                            visitor.bool_value(true, semantic_tag::none, *this, ec);
1964
0
                            more_ = !cursor_mode_;
1965
0
                        }
1966
0
                        else
1967
0
                        {
1968
0
                            if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1969
0
                            {
1970
0
                                basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1971
0
                                parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1972
0
                                parser.parse_some(visitor);
1973
0
                                parser.finish_parse(visitor);
1974
0
                            }
1975
0
                            else
1976
0
                            {
1977
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1978
0
                                more_ = !cursor_mode_;
1979
0
                            }
1980
0
                        }
1981
0
                    }
1982
0
                    break;
1983
0
                default:
1984
0
                    if (buffer_.length() > 0)
1985
0
                    {
1986
0
                        visitor.string_value(buffer_, semantic_tag::none, *this, ec);
1987
0
                        more_ = !cursor_mode_;
1988
0
                    }
1989
0
                    else
1990
0
                    {
1991
0
                        if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0)
1992
0
                        {
1993
0
                            basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1994
0
                            parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1995
0
                            parser.parse_some(visitor);
1996
0
                            parser.finish_parse(visitor);
1997
0
                        }
1998
0
                        else
1999
0
                        {
2000
0
                            visitor.string_value(string_view_type(), semantic_tag::none, *this, ec);
2001
0
                            more_ = !cursor_mode_;
2002
0
                        }
2003
0
                    }
2004
0
                    break;  
2005
0
            }
2006
0
        }
2007
33.1M
        else
2008
33.1M
        {
2009
33.1M
            if (infer_types)
2010
33.1M
            {
2011
33.1M
                end_value_with_numeric_check(visitor, ec);
2012
33.1M
            }
2013
5.31k
            else
2014
5.31k
            {
2015
5.31k
                visitor.string_value(buffer_, semantic_tag::none, *this, ec);
2016
5.31k
                more_ = !cursor_mode_;
2017
5.31k
            }
2018
33.1M
        }
2019
33.1M
    }
2020
2021
    enum class numeric_check_state 
2022
    {
2023
        initial,
2024
        null,
2025
        boolean_true,
2026
        boolean_false,
2027
        minus,
2028
        zero,
2029
        integer,
2030
        fraction1,
2031
        fraction,
2032
        exp1,
2033
        exp,
2034
        not_a_number
2035
    };
2036
2037
    /*
2038
        xxx_value 
2039
    */
2040
    void end_value_with_numeric_check(basic_json_visitor<CharT>& visitor, 
2041
        std::error_code& ec)
2042
33.1M
    {
2043
33.1M
        numeric_check_state state = numeric_check_state::initial;
2044
33.1M
        bool is_negative = false;
2045
        //int precision = 0;
2046
        //uint8_t decimal_places = 0;
2047
2048
33.1M
        auto last = buffer_.end();
2049
2050
33.1M
        std::string buffer;
2051
63.1M
        for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p)
2052
30.0M
        {
2053
30.0M
            switch (state)
2054
30.0M
            {
2055
3.29M
                case numeric_check_state::initial:
2056
3.29M
                {
2057
3.29M
                    switch (*p)
2058
3.29M
                    {
2059
95.1k
                    case 'n':case 'N':
2060
95.1k
                        if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L'))
2061
69.1k
                        {
2062
69.1k
                            state = numeric_check_state::null;
2063
69.1k
                        }
2064
25.9k
                        else
2065
25.9k
                        {
2066
25.9k
                            state = numeric_check_state::not_a_number;
2067
25.9k
                        }
2068
95.1k
                        break;
2069
139k
                    case 't':case 'T':
2070
139k
                        if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U'))
2071
123k
                        {
2072
123k
                            state = numeric_check_state::boolean_true;
2073
123k
                        }
2074
15.9k
                        else
2075
15.9k
                        {
2076
15.9k
                            state = numeric_check_state::not_a_number;
2077
15.9k
                        }
2078
139k
                        break;
2079
27.8k
                    case 'f':case 'F':
2080
27.8k
                        if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E'))
2081
4.43k
                        {
2082
4.43k
                            state = numeric_check_state::boolean_false;
2083
4.43k
                        }
2084
23.3k
                        else
2085
23.3k
                        {
2086
23.3k
                            state = numeric_check_state::not_a_number;
2087
23.3k
                        }
2088
27.8k
                        break;
2089
557k
                    case '-':
2090
557k
                        is_negative = true;
2091
557k
                        buffer.push_back(*p);
2092
557k
                        state = numeric_check_state::minus;
2093
557k
                        break;
2094
103k
                    case '0':
2095
                        //++precision;
2096
103k
                        buffer.push_back(*p);
2097
103k
                        state = numeric_check_state::zero;
2098
103k
                        break;
2099
931k
                    case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2100
                        //++precision;
2101
931k
                        buffer.push_back(*p);
2102
931k
                        state = numeric_check_state::integer;
2103
931k
                        break;
2104
1.44M
                    default:
2105
1.44M
                        state = numeric_check_state::not_a_number;
2106
1.44M
                        break;
2107
3.29M
                    }
2108
3.29M
                    break;
2109
3.29M
                }
2110
3.29M
                case numeric_check_state::zero:
2111
45.5k
                {
2112
45.5k
                    switch (*p)
2113
45.5k
                    {
2114
23.0k
                        case '.':
2115
23.0k
                        {
2116
23.0k
                            buffer.push_back('.');
2117
23.0k
                            state = numeric_check_state::fraction1;
2118
23.0k
                        }
2119
23.0k
                        break;
2120
11.0k
                    case 'e':case 'E':
2121
11.0k
                        buffer.push_back(*p);
2122
11.0k
                        state = numeric_check_state::exp1;
2123
11.0k
                        break;
2124
11.5k
                    default:
2125
11.5k
                        state = numeric_check_state::not_a_number;
2126
11.5k
                        break;
2127
45.5k
                    }
2128
45.5k
                    break;
2129
45.5k
                }
2130
23.9M
                case numeric_check_state::integer:
2131
23.9M
                {
2132
23.9M
                    switch (*p)
2133
23.9M
                    {
2134
23.3M
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2135
                        //++precision;
2136
23.3M
                        buffer.push_back(*p);
2137
23.3M
                        break;
2138
58.4k
                    case '.':
2139
58.4k
                        buffer.push_back('.');
2140
58.4k
                        state = numeric_check_state::fraction1;
2141
58.4k
                        break;
2142
594k
                    case 'e':case 'E':
2143
594k
                        buffer.push_back(*p);
2144
594k
                        state = numeric_check_state::exp1;
2145
594k
                        break;
2146
27.5k
                    default:
2147
27.5k
                        state = numeric_check_state::not_a_number;
2148
27.5k
                        break;
2149
23.9M
                    }
2150
23.9M
                    break;
2151
23.9M
                }
2152
23.9M
                case numeric_check_state::minus:
2153
521k
                {
2154
521k
                    switch (*p)
2155
521k
                    {
2156
428k
                    case '0':
2157
                        //++precision;
2158
428k
                        buffer.push_back(*p);
2159
428k
                        state = numeric_check_state::zero;
2160
428k
                        break;
2161
80.6k
                    case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2162
                        //++precision;
2163
80.6k
                        buffer.push_back(*p);
2164
80.6k
                        state = numeric_check_state::integer;
2165
80.6k
                        break;
2166
12.7k
                    default:
2167
12.7k
                        state = numeric_check_state::not_a_number;
2168
12.7k
                        break;
2169
521k
                    }
2170
521k
                    break;
2171
521k
                }
2172
521k
                case numeric_check_state::fraction1:
2173
79.1k
                {
2174
79.1k
                    switch (*p)
2175
79.1k
                    {
2176
75.2k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2177
                        //++precision;
2178
                        //++decimal_places;
2179
75.2k
                        buffer.push_back(*p);
2180
75.2k
                        state = numeric_check_state::fraction;
2181
75.2k
                        break;
2182
3.97k
                    default:
2183
3.97k
                        state = numeric_check_state::not_a_number;
2184
3.97k
                        break;
2185
79.1k
                    }
2186
79.1k
                    break;
2187
79.1k
                }
2188
157k
                case numeric_check_state::fraction:
2189
157k
                {
2190
157k
                    switch (*p)
2191
157k
                    {
2192
149k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2193
                        //++precision;
2194
                        //++decimal_places;
2195
149k
                        buffer.push_back(*p);
2196
149k
                        break;
2197
4.06k
                    case 'e':case 'E':
2198
4.06k
                        buffer.push_back(*p);
2199
4.06k
                        state = numeric_check_state::exp1;
2200
4.06k
                        break;
2201
4.34k
                    default:
2202
4.34k
                        state = numeric_check_state::not_a_number;
2203
4.34k
                        break;
2204
157k
                    }
2205
157k
                    break;
2206
157k
                }
2207
623k
                case numeric_check_state::exp1:
2208
623k
                {
2209
623k
                    switch (*p)
2210
623k
                    {
2211
19.3k
                    case '-':
2212
19.3k
                        buffer.push_back(*p);
2213
19.3k
                        break;
2214
2.39k
                    case '+':
2215
2.39k
                        break;
2216
595k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2217
595k
                        state = numeric_check_state::exp;
2218
595k
                        buffer.push_back(*p);
2219
595k
                        break;
2220
6.58k
                    default:
2221
6.58k
                        state = numeric_check_state::not_a_number;
2222
6.58k
                        break;
2223
623k
                    }
2224
623k
                    break;
2225
623k
                }
2226
715k
                case numeric_check_state::exp:
2227
715k
                {
2228
715k
                    switch (*p)
2229
715k
                    {
2230
703k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2231
703k
                        buffer.push_back(*p);
2232
703k
                        break;
2233
12.3k
                    default:
2234
12.3k
                        state = numeric_check_state::not_a_number;
2235
12.3k
                        break;
2236
715k
                    }
2237
715k
                    break;
2238
715k
                }
2239
715k
                default:
2240
596k
                    break;
2241
30.0M
            }
2242
30.0M
        }
2243
2244
33.1M
        switch (state)
2245
33.1M
        {
2246
69.1k
            case numeric_check_state::null:
2247
69.1k
                visitor.null_value(semantic_tag::none, *this, ec);
2248
69.1k
                more_ = !cursor_mode_;
2249
69.1k
                break;
2250
123k
            case numeric_check_state::boolean_true:
2251
123k
                visitor.bool_value(true, semantic_tag::none, *this, ec);
2252
123k
                more_ = !cursor_mode_;
2253
123k
                break;
2254
4.43k
            case numeric_check_state::boolean_false:
2255
4.43k
                visitor.bool_value(false, semantic_tag::none, *this, ec);
2256
4.43k
                more_ = !cursor_mode_;
2257
4.43k
                break;
2258
486k
            case numeric_check_state::zero:
2259
817k
            case numeric_check_state::integer:
2260
817k
            {
2261
817k
                if (is_negative)
2262
468k
                {
2263
468k
                    int64_t val{ 0 };
2264
468k
                    auto result = jsoncons::utility::dec_to_integer(buffer_.data(), buffer_.length(), val);
2265
468k
                    if (result)
2266
458k
                    {
2267
458k
                        visitor.int64_value(val, semantic_tag::none, *this, ec);
2268
458k
                        more_ = !cursor_mode_;
2269
458k
                    }
2270
9.78k
                    else // Must be overflow
2271
9.78k
                    {
2272
9.78k
                        visitor.string_value(buffer_, semantic_tag::bigint, *this, ec);
2273
9.78k
                        more_ = !cursor_mode_;
2274
9.78k
                    }
2275
468k
                }
2276
349k
                else
2277
349k
                {
2278
349k
                    uint64_t val{ 0 };
2279
349k
                    auto result = jsoncons::utility::dec_to_integer(buffer_.data(), buffer_.length(), val);
2280
349k
                    if (result)
2281
341k
                    {
2282
341k
                        visitor.uint64_value(val, semantic_tag::none, *this, ec);
2283
341k
                        more_ = !cursor_mode_;
2284
341k
                    }
2285
7.13k
                    else if (result.ec == std::errc::result_out_of_range)
2286
7.13k
                    {
2287
7.13k
                        visitor.string_value(buffer_, semantic_tag::bigint, *this, ec);
2288
7.13k
                        more_ = !cursor_mode_;
2289
7.13k
                    }
2290
0
                    else
2291
0
                    {
2292
0
                        ec = csv_errc::invalid_number; 
2293
0
                        more_ = false;
2294
0
                        return;
2295
0
                    }
2296
349k
                }
2297
817k
                break;
2298
817k
            }
2299
817k
            case numeric_check_state::fraction:
2300
649k
            case numeric_check_state::exp:
2301
649k
            {
2302
649k
                if (lossless_number_)
2303
0
                {
2304
0
                    visitor.string_value(buffer_,semantic_tag::bigdec, *this, ec);
2305
0
                    more_ = !cursor_mode_;
2306
0
                }
2307
649k
                else
2308
649k
                {
2309
649k
                    double d{0};
2310
649k
                    auto result = jsoncons::utility::decstr_to_double(buffer.c_str(), buffer.length(), d);
2311
649k
                    if (result.ec == std::errc::result_out_of_range)
2312
5.80k
                    {
2313
5.80k
                        d = buffer.front() == '-' ? -HUGE_VAL : HUGE_VAL;
2314
5.80k
                    }
2315
643k
                    else if (result.ec == std::errc::invalid_argument)
2316
8
                    {
2317
8
                        ec = csv_errc::invalid_number; 
2318
8
                        more_ = false;
2319
8
                        return;
2320
8
                    }
2321
649k
                    visitor.double_value(d, semantic_tag::none, *this, ec);
2322
649k
                    more_ = !cursor_mode_;
2323
649k
                }
2324
649k
                break;
2325
649k
            }
2326
31.4M
            default:
2327
31.4M
            {
2328
31.4M
                visitor.string_value(buffer_, semantic_tag::none, *this, ec);
2329
31.4M
                more_ = !cursor_mode_;
2330
31.4M
                break;
2331
649k
            }
2332
33.1M
        }
2333
33.1M
    } 
2334
2335
    void push_state(csv_parse_state state)
2336
10.0M
    {
2337
10.0M
        state_stack_.push_back(state);
2338
10.0M
    }
2339
2340
    csv_parse_state pop_state()
2341
10.0M
    {
2342
10.0M
        JSONCONS_ASSERT(!state_stack_.empty())
2343
10.0M
        csv_parse_state state = state_stack_.back();
2344
10.0M
        state_stack_.pop_back();
2345
10.0M
        return state;
2346
10.0M
    }
2347
};
2348
2349
using csv_parser = basic_csv_parser<char>;
2350
using wcsv_parser = basic_csv_parser<wchar_t>;
2351
2352
}}
2353
2354
#endif
2355