Coverage Report

Created: 2025-11-24 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp
Line
Count
Source
1
// Copyright 2013-2025 Daniel Parker
2
// Distributed under the Boost license, Version 1.0.
3
// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
5
// See https://github.com/danielaparker/jsoncons for latest version
6
7
#ifndef JSONCONS_CSV_CSV_PARSER_HPP
8
#define JSONCONS_CSV_CSV_PARSER_HPP
9
10
#include <cctype>
11
#include <cstddef>
12
#include <cstdint>
13
#include <functional>
14
#include <memory> // std::allocator
15
#include <sstream>
16
#include <string>
17
#include <system_error>
18
#include <vector>
19
20
#include <jsoncons/config/compiler_support.hpp>
21
#include <jsoncons/config/jsoncons_config.hpp>
22
#include <jsoncons/utility/read_number.hpp>
23
#include <jsoncons/json_exception.hpp>
24
#include <jsoncons/json_filter.hpp>
25
#include <jsoncons/json_reader.hpp>
26
#include <jsoncons/json_type.hpp>
27
#include <jsoncons/json_visitor.hpp>
28
#include <jsoncons/semantic_tag.hpp>
29
#include <jsoncons/ser_util.hpp>
30
#include <jsoncons/staj_event.hpp>
31
32
#include <jsoncons_ext/csv/csv_error.hpp>
33
#include <jsoncons_ext/csv/csv_options.hpp>
34
35
namespace jsoncons { 
36
namespace csv {
37
38
enum class csv_mode 
39
{
40
    initial,
41
    header,
42
    data,
43
    subfields
44
};
45
46
enum class csv_parse_state 
47
{
48
    start,
49
    cr, 
50
    expect_comment_or_record,
51
    expect_record,
52
    end_record,
53
    no_more_records,
54
    comment,
55
    between_values,
56
    quoted_string,
57
    unquoted_string,
58
    before_unquoted_string,
59
    escaped_value,
60
    minus, 
61
    zero,  
62
    integer,
63
    fraction,
64
    exp1,
65
    exp2,
66
    exp3,
67
    accept,
68
    before_unquoted_field,
69
    before_unquoted_field_tail, 
70
    before_unquoted_field_tail1,
71
    before_last_unquoted_field,
72
    before_last_unquoted_field_tail,
73
    before_unquoted_subfield,
74
    before_unquoted_subfield_tail,
75
    before_quoted_subfield,
76
    before_quoted_subfield_tail,
77
    before_quoted_field,
78
    before_quoted_field_tail,
79
    before_last_quoted_field,
80
    before_last_quoted_field_tail,
81
    done
82
};
83
84
enum class cached_state
85
{
86
    begin_object,
87
    end_object,
88
    begin_array,
89
    end_array,
90
    name,
91
    item,
92
    done
93
};
94
95
struct default_csv_parsing
96
{
97
    bool operator()(csv_errc, const ser_context&) noexcept
98
0
    {
99
0
        return false;
100
0
    }
101
};
102
103
namespace detail {
104
105
    template <typename CharT,typename TempAlloc >
106
    class parse_event
107
    {
108
        using temp_allocator_type = TempAlloc;
109
        using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
110
        using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
111
        using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>;                  
112
        using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
113
        using byte_string_type = basic_byte_string<byte_allocator_type>;
114
115
        staj_event_type event_type;
116
        string_type string_value;
117
        byte_string_type byte_string_value;
118
        union
119
        {
120
            bool bool_value;
121
            int64_t int64_value;
122
            uint64_t uint64_value;
123
            double double_value;
124
        };
125
        semantic_tag tag;
126
    public:
127
        parse_event(staj_event_type event_type, semantic_tag tag, const TempAlloc& alloc)
128
0
            : event_type(event_type), 
129
0
              string_value(alloc),
130
0
              byte_string_value(alloc),
131
0
              tag(tag)
132
0
        {
133
0
        }
134
135
        parse_event(const string_view_type& value, semantic_tag tag, const TempAlloc& alloc)
136
0
            : event_type(staj_event_type::string_value), 
137
0
              string_value(value.data(),value.length(),alloc), 
138
0
              byte_string_value(alloc),
139
0
              tag(tag)
140
0
        {
141
0
        }
142
143
        parse_event(const byte_string_view& value, semantic_tag tag, const TempAlloc& alloc)
144
0
            : event_type(staj_event_type::byte_string_value), 
145
0
              string_value(alloc),
146
0
              byte_string_value(value.data(),value.size(),alloc), 
147
0
              tag(tag)
148
0
        {
149
0
        }
150
151
        parse_event(bool value, semantic_tag tag, const TempAlloc& alloc)
152
0
            : event_type(staj_event_type::bool_value), 
153
0
              string_value(alloc),
154
0
              byte_string_value(alloc),
155
0
              bool_value(value), 
156
0
              tag(tag)
157
0
        {
158
0
        }
159
160
        parse_event(int64_t value, semantic_tag tag, const TempAlloc& alloc)
161
0
            : event_type(staj_event_type::int64_value), 
162
0
              string_value(alloc),
163
0
              byte_string_value(alloc),
164
0
              int64_value(value), 
165
0
              tag(tag)
166
0
        {
167
0
        }
168
169
        parse_event(uint64_t value, semantic_tag tag, const TempAlloc& alloc)
170
0
            : event_type(staj_event_type::uint64_value), 
171
0
              string_value(alloc),
172
0
              byte_string_value(alloc),
173
0
              uint64_value(value), 
174
0
              tag(tag)
175
0
        {
176
0
        }
177
178
        parse_event(double value, semantic_tag tag, const TempAlloc& alloc)
179
0
            : event_type(staj_event_type::double_value), 
180
0
              string_value(alloc),
181
0
              byte_string_value(alloc),
182
0
              double_value(value), 
183
0
              tag(tag)
184
0
        {
185
0
        }
186
187
        parse_event(const parse_event&) = default;
188
0
        parse_event(parse_event&&) = default;
189
        parse_event& operator=(const parse_event&) = default;
190
        parse_event& operator=(parse_event&&) = default;
191
192
        void replay(basic_json_visitor<CharT>& visitor) const
193
0
        {
194
0
            switch (event_type)
195
0
            {
196
0
                case staj_event_type::begin_array:
197
0
                    visitor.begin_array(tag, ser_context());
198
0
                    break;
199
0
                case staj_event_type::end_array:
200
0
                    visitor.end_array(ser_context());
201
0
                    break;
202
0
                case staj_event_type::string_value:
203
0
                    visitor.string_value(string_value, tag, ser_context());
204
0
                    break;
205
0
                case staj_event_type::byte_string_value:
206
0
                    visitor.byte_string_value(byte_string_value, tag, ser_context());
207
0
                    break;
208
0
                case staj_event_type::null_value:
209
0
                    visitor.null_value(tag, ser_context());
210
0
                    break;
211
0
                case staj_event_type::bool_value:
212
0
                    visitor.bool_value(bool_value, tag, ser_context());
213
0
                    break;
214
0
                case staj_event_type::int64_value:
215
0
                    visitor.int64_value(int64_value, tag, ser_context());
216
0
                    break;
217
0
                case staj_event_type::uint64_value:
218
0
                    visitor.uint64_value(uint64_value, tag, ser_context());
219
0
                    break;
220
0
                case staj_event_type::double_value:
221
0
                    visitor.double_value(double_value, tag, ser_context());
222
0
                    break;
223
0
                default:
224
0
                    break;
225
0
            }
226
0
        }
227
    };
228
229
    template <typename CharT,typename TempAlloc >
230
    class m_columns_filter : public basic_json_visitor<CharT>
231
    {
232
    public:
233
        using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
234
        using char_type = CharT;
235
        using temp_allocator_type = TempAlloc;
236
237
        using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
238
        using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
239
240
        using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>;
241
        using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAlloc>>;
242
        using parse_event_vector_type = std::vector<parse_event<CharT,TempAlloc>, parse_event_allocator_type>;
243
        using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>;
244
    private:
245
        TempAlloc alloc_;
246
        std::size_t name_index_{0};
247
        int level_{0};
248
        int level2_{0};
249
        cached_state state_{cached_state::begin_object};
250
        std::size_t column_index_{0};
251
        std::size_t row_index_{0};
252
253
        std::vector<string_type, string_allocator_type> column_names_;
254
        std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_;
255
    public:
256
257
        m_columns_filter(const TempAlloc& alloc)
258
2.52k
            : alloc_(alloc),
259
2.52k
              column_names_(alloc),
260
2.52k
              cached_events_(alloc)
261
2.52k
        {
262
2.52k
        }
263
264
        void reset()
265
        {
266
            name_index_ = 0;
267
            level_ = 0;
268
            level2_ = 0;
269
            state_ = cached_state::begin_object;
270
            column_index_ = 0;
271
            row_index_ = 0;
272
            column_names_.clear();
273
            cached_events_.clear();
274
        }
275
276
        bool done() const
277
0
        {
278
0
            return state_ == cached_state::done;
279
0
        }
280
281
        void initialize(const std::vector<string_type, string_allocator_type>& column_names)
282
0
        {
283
0
            for (const auto& name : column_names)
284
0
            {
285
0
                column_names_.push_back(name);
286
0
                cached_events_.emplace_back(alloc_);
287
0
            }
288
0
            name_index_ = 0;
289
0
            level_ = 0;
290
0
            level2_ = 0;
291
0
            column_index_ = 0;
292
0
            row_index_ = 0;
293
0
            state_ = cached_state::begin_object;
294
0
        }
295
296
        void skip_column()
297
0
        {
298
0
            ++name_index_;
299
0
        }
300
        
301
        int level() const
302
        {
303
            return static_cast<int>(level_);
304
        }
305
306
        bool replay_parse_events(basic_json_visitor<CharT>& visitor, bool cursor_mode, int mark_level)
307
0
        {
308
0
            bool more = true;
309
0
            while (more)
310
0
            {
311
0
                switch (state_)
312
0
                {
313
0
                    case cached_state::begin_object:
314
0
                        visitor.begin_object(semantic_tag::none, ser_context());
315
0
                        ++level_;
316
0
                        more = !cursor_mode;
317
0
                        column_index_ = 0;
318
0
                        state_ = cached_state::name;
319
0
                        break;
320
0
                    case cached_state::end_object:
321
0
                        visitor.end_object(ser_context());
322
0
                        more = !cursor_mode;
323
0
                        if (level_ == mark_level)
324
0
                        {
325
0
                            more = false;
326
0
                        }
327
0
                        --level_;
328
0
                        state_ = cached_state::done;
329
0
                        break;
330
0
                    case cached_state::name:
331
0
                        if (column_index_ < column_names_.size())
332
0
                        {
333
0
                            visitor.key(column_names_[column_index_], ser_context());
334
0
                            more = !cursor_mode;
335
0
                            state_ = cached_state::begin_array;
336
0
                        }
337
0
                        else
338
0
                        {
339
0
                            state_ = cached_state::end_object;
340
0
                        }
341
0
                        break;
342
0
                    case cached_state::begin_array:
343
0
                        visitor.begin_array(semantic_tag::none, ser_context());
344
0
                        ++level_;
345
0
                        more = !cursor_mode;
346
0
                        row_index_ = 0;
347
0
                        state_ = cached_state::item;
348
0
                        break;
349
0
                    case cached_state::end_array:
350
0
                        visitor.end_array(ser_context());
351
0
                        more = !cursor_mode;
352
0
                        if (level_ == mark_level)
353
0
                        {
354
0
                            more = false;
355
0
                        }
356
0
                        --level_;
357
0
                        ++column_index_;
358
0
                        state_ = cached_state::name;
359
0
                        break;
360
0
                    case cached_state::item:
361
0
                        if (row_index_ < cached_events_[column_index_].size())
362
0
                        {
363
0
                            cached_events_[column_index_][row_index_].replay(visitor);
364
0
                            more = !cursor_mode;
365
0
                            ++row_index_;
366
0
                        }
367
0
                        else
368
0
                        {
369
0
                            state_ = cached_state::end_array;
370
0
                        }
371
0
                        break;
372
0
                    default:
373
0
                        more = false;
374
0
                        break;
375
0
                }
376
0
            }
377
0
            return more;
378
0
        }
379
380
        void visit_flush() override
381
0
        {
382
0
        }
383
384
        JSONCONS_VISITOR_RETURN_TYPE visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override
385
0
        {
386
0
            ec = csv_errc::invalid_parse_state;
387
0
            JSONCONS_VISITOR_RETURN;
388
0
        }
389
390
        JSONCONS_VISITOR_RETURN_TYPE visit_end_object(const ser_context&, std::error_code& ec) override
391
0
        {
392
0
            ec = csv_errc::invalid_parse_state;
393
0
            JSONCONS_VISITOR_RETURN;
394
0
        }
395
396
        JSONCONS_VISITOR_RETURN_TYPE visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override
397
0
        {
398
0
            if (name_index_ < column_names_.size())
399
0
            {
400
0
                cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_);
401
                
402
0
                ++level2_;
403
0
            }
404
0
            JSONCONS_VISITOR_RETURN;
405
0
        }
406
407
        JSONCONS_VISITOR_RETURN_TYPE visit_end_array(const ser_context&, std::error_code&) override
408
0
        {
409
0
            if (level2_ > 0)
410
0
            {
411
0
                cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_);
412
0
                ++name_index_;
413
0
                --level2_;
414
0
            }
415
0
            else
416
0
            {
417
0
                name_index_ = 0;
418
0
            }
419
0
            JSONCONS_VISITOR_RETURN;
420
0
        }
421
422
        JSONCONS_VISITOR_RETURN_TYPE visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override
423
0
        {
424
0
            ec = csv_errc::invalid_parse_state;
425
0
            JSONCONS_VISITOR_RETURN;
426
0
        }
427
428
        JSONCONS_VISITOR_RETURN_TYPE visit_null(semantic_tag tag, const ser_context&, std::error_code&) override
429
0
        {
430
0
            if (name_index_ < column_names_.size())
431
0
            {
432
0
                cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_);
433
0
                if (level2_ == 0)
434
0
                {
435
0
                    ++name_index_;
436
0
                }
437
0
            }
438
0
            JSONCONS_VISITOR_RETURN;
439
0
        }
440
441
        JSONCONS_VISITOR_RETURN_TYPE visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override
442
0
        {
443
0
            if (name_index_ < column_names_.size())
444
0
            {
445
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
446
447
0
                if (level2_ == 0)
448
0
                {
449
0
                    ++name_index_;
450
0
                }
451
0
            }
452
0
            JSONCONS_VISITOR_RETURN;
453
0
        }
454
455
        JSONCONS_VISITOR_RETURN_TYPE visit_byte_string(const byte_string_view& value,
456
                                  semantic_tag tag,
457
                                  const ser_context&,
458
                                  std::error_code&) override
459
0
        {
460
0
            if (name_index_ < column_names_.size())
461
0
            {
462
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
463
0
                if (level2_ == 0)
464
0
                {
465
0
                    ++name_index_;
466
0
                }
467
0
            }
468
0
            JSONCONS_VISITOR_RETURN;
469
0
        }
470
471
        JSONCONS_VISITOR_RETURN_TYPE visit_double(double value,
472
                             semantic_tag tag, 
473
                             const ser_context&,
474
                             std::error_code&) override
475
0
        {
476
0
            if (name_index_ < column_names_.size())
477
0
            {
478
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
479
0
                if (level2_ == 0)
480
0
                {
481
0
                    ++name_index_;
482
0
                }
483
0
            }
484
0
            JSONCONS_VISITOR_RETURN;
485
0
        }
486
487
        JSONCONS_VISITOR_RETURN_TYPE visit_int64(int64_t value,
488
                            semantic_tag tag,
489
                            const ser_context&,
490
                            std::error_code&) override
491
0
        {
492
0
            if (name_index_ < column_names_.size())
493
0
            {
494
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
495
0
                if (level2_ == 0)
496
0
                {
497
0
                    ++name_index_;
498
0
                }
499
0
            }
500
0
            JSONCONS_VISITOR_RETURN;
501
0
        }
502
503
        JSONCONS_VISITOR_RETURN_TYPE visit_uint64(uint64_t value,
504
                             semantic_tag tag,
505
                             const ser_context&,
506
                             std::error_code&) override
507
0
        {
508
0
            if (name_index_ < column_names_.size())
509
0
            {
510
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
511
0
                if (level2_ == 0)
512
0
                {
513
0
                    ++name_index_;
514
0
                }
515
0
            }
516
0
            JSONCONS_VISITOR_RETURN;
517
0
        }
518
519
        JSONCONS_VISITOR_RETURN_TYPE visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override
520
0
        {
521
0
            if (name_index_ < column_names_.size())
522
0
            {
523
0
                cached_events_[name_index_].emplace_back(value, tag, alloc_);
524
0
                if (level2_ == 0)
525
0
                {
526
0
                    ++name_index_;
527
0
                }
528
0
            }
529
0
            JSONCONS_VISITOR_RETURN;
530
0
        }
531
    };
532
533
} // namespace detail
534
535
template <typename CharT,typename TempAlloc =std::allocator<char>>
536
class basic_csv_parser : public ser_context
537
{
538
public:
539
    using string_view_type = jsoncons::basic_string_view<CharT>;
540
    using char_type = CharT;
541
private:
542
    struct string_maps_to_double
543
    {
544
        string_view_type s;
545
546
        bool operator()(const std::pair<string_view_type,double>& val) const
547
0
        {
548
0
            return val.first == s;
549
0
        }
550
    };
551
552
    using temp_allocator_type = TempAlloc;
553
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type;
554
    using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
555
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type;
556
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type;
557
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type;
558
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type;
559
    typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type;
560
561
    static constexpr int default_depth = 3;
562
563
    temp_allocator_type alloc_;
564
    csv_parse_state state_;
565
    std::function<bool(csv_errc,const ser_context&)> err_handler_;
566
    std::size_t column_{1};
567
    std::size_t line_{1};
568
    int nesting_depth_{default_depth};
569
    bool assume_header_;
570
    char_type comment_starter_;
571
    char_type field_delimiter_;
572
    std::size_t header_lines_;
573
    bool ignore_empty_values_;
574
    bool ignore_empty_lines_;
575
    bool infer_types_;
576
    bool lossless_number_; 
577
    csv_mapping_kind mapping_kind_;
578
    std::size_t max_lines_;
579
    char_type quote_char_;
580
    char_type quote_escape_char_;
581
    char_type subfield_delimiter_;
582
    bool trim_leading_;
583
    bool trim_leading_inside_quotes_;
584
    bool trim_trailing_;
585
    bool trim_trailing_inside_quotes_;
586
    bool unquoted_empty_value_is_null_;
587
    std::size_t min_column_names_{0};
588
    std::size_t column_index_{0};
589
    int level_{0};
590
    std::size_t depth_{0};
591
    std::size_t offset_{0};
592
    const CharT* begin_input_{nullptr};
593
    const CharT* input_end_{nullptr};
594
    const CharT* input_ptr_{nullptr};
595
    bool more_{true};
596
    std::size_t header_line_{1};
597
    bool cursor_mode_{false};
598
    bool actual_cursor_mode_{false};
599
    int mark_level_{0};
600
    std::size_t header_line_offset_{0};
601
602
    detail::m_columns_filter<CharT,TempAlloc> m_columns_filter_;
603
    std::vector<csv_mode,csv_mode_allocator_type> stack_;
604
    std::vector<string_type,string_allocator_type> column_names_;
605
    std::vector<csv_type_info,csv_type_info_allocator_type> column_types_;
606
    std::vector<string_type,string_allocator_type> column_defaults_;
607
    std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_;
608
    string_type buffer_;
609
    std::vector<std::pair<std::basic_string<char_type>,double>> string_double_map_;
610
611
public:
612
    basic_csv_parser()
613
       : basic_csv_parser(basic_csv_decode_options<CharT>())
614
    {
615
    }
616
617
    explicit basic_csv_parser(const TempAlloc& alloc)
618
       : basic_csv_parser(basic_csv_decode_options<CharT>(), 
619
                          default_csv_parsing(),
620
                          alloc)
621
    {
622
    }
623
624
    explicit basic_csv_parser(const basic_csv_decode_options<CharT>& options)
625
        : basic_csv_parser(options, TempAlloc{})
626
    {
627
    }
628
629
    basic_csv_parser(const basic_csv_decode_options<CharT>& options,
630
        const TempAlloc& alloc)
631
2.52k
       : alloc_(alloc),
632
2.52k
         state_(csv_parse_state::start),
633
2.52k
         err_handler_(default_csv_parsing()),
634
2.52k
         assume_header_(options.assume_header()),                  
635
2.52k
         comment_starter_(options.comment_starter()),
636
2.52k
         field_delimiter_(options.field_delimiter()),
637
2.52k
         header_lines_(options.header_lines()),
638
2.52k
         ignore_empty_values_(options.ignore_empty_values()),
639
2.52k
         ignore_empty_lines_(options.ignore_empty_lines()),
640
2.52k
         infer_types_(options.infer_types()),
641
2.52k
         lossless_number_(options.lossless_number()), 
642
2.52k
         mapping_kind_(options.mapping_kind()),
643
2.52k
         max_lines_(options.max_lines()),
644
2.52k
         quote_char_(options.quote_char()),
645
2.52k
         quote_escape_char_(options.quote_escape_char()),
646
2.52k
         subfield_delimiter_(options.subfield_delimiter()),
647
2.52k
         trim_leading_(options.trim_leading()),
648
2.52k
         trim_leading_inside_quotes_(options.trim_leading_inside_quotes()),
649
2.52k
         trim_trailing_(options.trim_trailing()),
650
2.52k
         trim_trailing_inside_quotes_(options.trim_trailing_inside_quotes()),
651
2.52k
         unquoted_empty_value_is_null_(options.unquoted_empty_value_is_null()),
652
2.52k
         m_columns_filter_(alloc),
653
2.52k
         stack_(alloc),
654
2.52k
         column_names_(alloc),
655
2.52k
         column_types_(alloc),
656
2.52k
         column_defaults_(alloc),
657
2.52k
         state_stack_(alloc),
658
2.52k
         buffer_(alloc)
659
2.52k
    {
660
2.52k
        if (options.enable_str_to_nan())
661
0
        {
662
0
            string_double_map_.emplace_back(options.nan_to_str(),std::nan(""));
663
0
        }
664
2.52k
        if (options.enable_str_to_inf())
665
0
        {
666
0
            string_double_map_.emplace_back(options.inf_to_str(),std::numeric_limits<double>::infinity());
667
0
        }
668
2.52k
        if (options.enable_str_to_neginf())
669
0
        {
670
0
            string_double_map_.emplace_back(options.neginf_to_str(),-std::numeric_limits<double>::infinity());
671
0
        }
672
673
2.52k
        jsoncons::csv::detail::parse_column_types(options.column_types(), column_types_);
674
2.52k
        jsoncons::csv::detail::parse_column_names(options.column_defaults(), column_defaults_);
675
2.52k
        jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_);
676
2.52k
        min_column_names_ = column_names_.size();
677
2.52k
        initialize();
678
2.52k
    }
679
680
#if !defined(JSONCONS_NO_DEPRECATED)
681
    basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler,
682
                     const TempAlloc& alloc = TempAlloc())
683
        : basic_csv_parser(basic_csv_decode_options<CharT>(), 
684
                           err_handler,
685
                           alloc)
686
    {
687
    }
688
689
    basic_csv_parser(const basic_csv_decode_options<CharT>& options,
690
                     std::function<bool(csv_errc,const ser_context&)> err_handler,
691
                     const TempAlloc& alloc = TempAlloc())
692
       : alloc_(alloc),
693
         state_(csv_parse_state::start),
694
         err_handler_(err_handler),
695
         assume_header_(options.assume_header()),                  
696
         comment_starter_(options.comment_starter()),
697
         field_delimiter_(options.field_delimiter()),
698
         header_lines_(options.header_lines()),
699
         ignore_empty_values_(options.ignore_empty_values()),
700
         ignore_empty_lines_(options.ignore_empty_lines()),
701
         infer_types_(options.infer_types()),
702
         lossless_number_(options.lossless_number()), 
703
         mapping_kind_(options.mapping_kind()),
704
         max_lines_(options.max_lines()),
705
         quote_char_(options.quote_char()),
706
         quote_escape_char_(options.quote_escape_char()),
707
         subfield_delimiter_(options.subfield_delimiter()),
708
         trim_leading_(options.trim_leading()),
709
         trim_leading_inside_quotes_(options.trim_leading_inside_quotes()),
710
         trim_trailing_(options.trim_trailing()),
711
         trim_trailing_inside_quotes_(options.trim_trailing_inside_quotes()),
712
         unquoted_empty_value_is_null_(options.unquoted_empty_value_is_null()),
713
         m_columns_filter_(alloc),
714
         stack_(alloc),
715
         column_names_(alloc),
716
         column_types_(alloc),
717
         column_defaults_(alloc),
718
         state_stack_(alloc),
719
         buffer_(alloc)
720
    {
721
        if (options.enable_str_to_nan())
722
        {
723
            string_double_map_.emplace_back(options.nan_to_str(),std::nan(""));
724
        }
725
        if (options.enable_str_to_inf())
726
        {
727
            string_double_map_.emplace_back(options.inf_to_str(),std::numeric_limits<double>::infinity());
728
        }
729
        if (options.enable_str_to_neginf())
730
        {
731
            string_double_map_.emplace_back(options.neginf_to_str(),-std::numeric_limits<double>::infinity());
732
        }
733
734
        jsoncons::csv::detail::parse_column_types(options.column_types(), column_types_);
735
        jsoncons::csv::detail::parse_column_names(options.column_defaults(), column_defaults_);
736
        jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_);
737
        min_column_names_ = column_names_.size();
738
        initialize();
739
    }
740
#endif
741
742
    ~basic_csv_parser() noexcept
743
2.52k
    {
744
2.52k
    }
745
746
    void cursor_mode(bool value)
747
    {
748
        actual_cursor_mode_ = value;
749
        cursor_mode_ = (mapping_kind_ == csv_mapping_kind::m_columns) ? false : value;
750
    }
751
752
    int level() const
753
3.14k
    {
754
3.14k
        return level_;
755
3.14k
    }
756
757
    int mark_level() const 
758
    {
759
        return mark_level_;
760
    }
761
762
    void mark_level(int value)
763
    {
764
        mark_level_ = value;
765
    }
766
767
    bool done() const
768
    {
769
        return state_ == csv_parse_state::done;
770
    }
771
772
    bool accept() const
773
    {
774
        return state_ == csv_parse_state::accept || state_ == csv_parse_state::done;
775
    }
776
777
    bool stopped() const
778
18.9k
    {
779
18.9k
        return !more_;
780
18.9k
    }
781
782
    bool source_exhausted() const
783
16.4k
    {
784
16.4k
        return input_ptr_ == input_end_;
785
16.4k
    }
786
787
    const std::vector<string_type,string_allocator_type>& column_labels() const
788
    {
789
        return column_names_;
790
    }
791
792
    void reinitialize()
793
    {
794
        state_ = csv_parse_state::start;
795
        column_ = 1;
796
        line_ = 1;
797
        nesting_depth_ = default_depth;
798
        column_index_ = 0;
799
        level_ = 0;
800
        depth_ = 0;
801
        offset_ = 0;
802
        begin_input_ = nullptr;
803
        input_end_ = nullptr;
804
        input_ptr_ = nullptr;
805
        more_ = true;
806
        header_line_ = 1;
807
        m_columns_filter_.reset();
808
        stack_.clear();
809
        column_names_.erase(column_names_.begin() + min_column_names_, column_names_.end());
810
        state_stack_.clear();
811
        buffer_.clear();
812
813
        initialize();
814
    }
815
816
    void restart()
817
    {
818
        more_ = true;
819
    }
820
821
    void parse_some(basic_json_visitor<CharT>& visitor)
822
    {
823
        std::error_code ec;
824
        parse_some(visitor, ec);
825
        if (JSONCONS_UNLIKELY(ec))
826
        {
827
            JSONCONS_THROW(ser_error(ec,line_,column_));
828
        }
829
    }
830
831
    void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec)
832
16.4k
    {
833
16.4k
        basic_json_visitor<CharT>& local_visitor = (mapping_kind_ == csv_mapping_kind::m_columns) 
834
16.4k
            ? m_columns_filter_ : visitor;
835
836
16.4k
        switch (mapping_kind_)
837
16.4k
        {
838
0
            case csv_mapping_kind::m_columns:
839
0
                cursor_mode_ = false;
840
0
                break;
841
16.4k
            default:
842
16.4k
                break;
843
16.4k
        } 
844
845
16.4k
        const CharT* local_input_end = input_end_;
846
847
16.4k
        if (input_ptr_ == local_input_end && more_)
848
13.9k
        {
849
13.9k
            switch (state_)
850
13.9k
            {
851
1
                case csv_parse_state::start:
852
1
                    ec = csv_errc::source_error;
853
1
                    more_ = false;
854
1
                    return;
855
2.09k
                case csv_parse_state::before_unquoted_field:
856
2.09k
                case csv_parse_state::before_last_unquoted_field:
857
2.09k
                    end_unquoted_string_value(local_visitor, ec);
858
2.09k
                    state_ = csv_parse_state::before_last_unquoted_field_tail;
859
2.09k
                    break;
860
2.09k
                case csv_parse_state::before_last_unquoted_field_tail:
861
2.09k
                    if (stack_.back() == csv_mode::subfields)
862
0
                    {
863
0
                        stack_.pop_back();
864
0
                        local_visitor.end_array(*this, ec);
865
0
                        more_ = !cursor_mode_;
866
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
867
0
                        {
868
0
                            if (level() == mark_level_)
869
0
                            {
870
0
                                more_ = false;
871
0
                            }
872
0
                        }
873
0
                        --level_;
874
0
                    }
875
2.09k
                    ++column_index_;
876
2.09k
                    state_ = csv_parse_state::end_record;
877
2.09k
                    break;
878
270
                case csv_parse_state::before_unquoted_string: 
879
270
                    buffer_.clear();
880
270
                    JSONCONS_FALLTHROUGH;
881
2.09k
                case csv_parse_state::unquoted_string: 
882
2.09k
                    if (trim_leading_ || trim_trailing_)
883
0
                    {
884
0
                        trim_string_buffer(trim_leading_,trim_trailing_);
885
0
                    }
886
2.09k
                    if (ignore_empty_values_ && buffer_.empty())
887
0
                    {
888
0
                        state_ = csv_parse_state::end_record;
889
0
                    }
890
2.09k
                    else
891
2.09k
                    {
892
2.09k
                        before_value(local_visitor, ec);
893
2.09k
                        state_ = csv_parse_state::before_unquoted_field;
894
2.09k
                    }
895
2.09k
                    break;
896
31
                case csv_parse_state::before_last_quoted_field:
897
31
                    end_quoted_string_value(local_visitor, ec);
898
31
                    ++column_index_;
899
31
                    state_ = csv_parse_state::end_record;
900
31
                    break;
901
31
                case csv_parse_state::escaped_value:
902
31
                    if (quote_escape_char_ == quote_char_)
903
31
                    {
904
31
                        if (!(ignore_empty_values_ && buffer_.empty()))
905
31
                        {
906
31
                            before_value(local_visitor, ec);
907
31
                            ++column_;
908
31
                            state_ = csv_parse_state::before_last_quoted_field;
909
31
                        }
910
0
                        else
911
0
                        {
912
0
                            state_ = csv_parse_state::end_record;
913
0
                        }
914
31
                    }
915
0
                    else
916
0
                    {
917
0
                        ec = csv_errc::invalid_escaped_char;
918
0
                        more_ = false;
919
0
                        return;
920
0
                    }
921
31
                    break;
922
2.43k
                case csv_parse_state::end_record:
923
2.43k
                    if (column_index_ > 0)
924
2.13k
                    {
925
2.13k
                        end_record(local_visitor, ec);
926
2.13k
                    }
927
2.43k
                    state_ = csv_parse_state::no_more_records;
928
2.43k
                    break;
929
2.43k
                case csv_parse_state::no_more_records: 
930
2.43k
                    switch (stack_.back()) 
931
2.43k
                    {
932
118
                        case csv_mode::header:
933
118
                            stack_.pop_back();
934
118
                            break;
935
2.31k
                        case csv_mode::data:
936
2.31k
                            stack_.pop_back();
937
2.31k
                            break;
938
0
                        default:
939
0
                            break;
940
2.43k
                    }
941
2.43k
                    local_visitor.end_array(*this, ec);
942
2.43k
                    more_ = !cursor_mode_;
943
2.43k
                    if (mapping_kind_ != csv_mapping_kind::m_columns)
944
2.43k
                    {
945
2.43k
                        if (level() == mark_level_)
946
0
                        {
947
0
                            more_ = false;
948
0
                        }
949
2.43k
                    }
950
2.43k
                    --level_;
951
                    
952
2.43k
                    if (mapping_kind_ == csv_mapping_kind::m_columns)
953
0
                    {
954
0
                        if (!m_columns_filter_.done())
955
0
                        {
956
0
                            more_ = m_columns_filter_.replay_parse_events(visitor, actual_cursor_mode_, mark_level_);
957
0
                        }
958
0
                        else
959
0
                        {
960
0
                            state_ = csv_parse_state::accept;
961
0
                        }
962
0
                    }
963
2.43k
                    else
964
2.43k
                    {
965
2.43k
                        state_ = csv_parse_state::accept;
966
2.43k
                    }
967
2.43k
                    break;
968
2.43k
                case csv_parse_state::accept:
969
2.43k
                    if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial))
970
0
                    {
971
0
                        err_handler_(csv_errc::unexpected_eof, *this);
972
0
                        ec = csv_errc::unexpected_eof;
973
0
                        more_ = false;
974
0
                        return;
975
0
                    }
976
2.43k
                    stack_.pop_back();
977
2.43k
                    local_visitor.flush();
978
2.43k
                    state_ = csv_parse_state::done;
979
2.43k
                    more_ = false;
980
2.43k
                    return;
981
313
                default:
982
313
                    state_ = csv_parse_state::end_record;
983
313
                    break;
984
13.9k
            }
985
13.9k
        }
986
987
166M
        for (; (input_ptr_ < local_input_end) && more_;)
988
166M
        {
989
166M
            CharT curr_char = *input_ptr_;
990
991
166M
            switch (state_) 
992
166M
            {
993
4.34M
                case csv_parse_state::cr:
994
4.34M
                    ++line_;
995
4.34M
                    column_ = 1;
996
4.34M
                    switch (*input_ptr_)
997
4.34M
                    {
998
326
                        case '\n':
999
326
                            ++input_ptr_;
1000
326
                            state_ = pop_state();
1001
326
                            break;
1002
4.34M
                        default:
1003
4.34M
                            state_ = pop_state();
1004
4.34M
                            break;
1005
4.34M
                    }
1006
4.34M
                    break;
1007
4.34M
                case csv_parse_state::start:
1008
2.51k
                    if (mapping_kind_ != csv_mapping_kind::m_columns)
1009
2.51k
                    {
1010
2.51k
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1011
2.51k
                        more_ = !cursor_mode_;
1012
2.51k
                        ++level_;
1013
2.51k
                    }
1014
2.51k
                    if (assume_header_ && mapping_kind_ == csv_mapping_kind::n_rows && !column_names_.empty())
1015
0
                    {
1016
0
                        column_index_ = 0; 
1017
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1018
0
                        ++level_;
1019
0
                        more_ = !cursor_mode_;
1020
0
                        state_ = csv_parse_state::expect_comment_or_record;
1021
0
                    }
1022
2.51k
                    else
1023
2.51k
                    {
1024
2.51k
                        state_ = csv_parse_state::expect_comment_or_record;
1025
2.51k
                    }
1026
2.51k
                    break;
1027
9.87M
                case csv_parse_state::comment: 
1028
9.87M
                    switch (curr_char)
1029
9.87M
                    {
1030
1.81M
                        case '\n':
1031
1.81M
                        {
1032
1.81M
                            ++line_;
1033
1.81M
                            if (stack_.back() == csv_mode::header)
1034
1.66M
                            {
1035
1.66M
                                ++header_line_offset_;
1036
1.66M
                            }
1037
1.81M
                            column_ = 1;
1038
1.81M
                            state_ = csv_parse_state::expect_comment_or_record;
1039
1.81M
                            break;
1040
0
                        }
1041
15.1k
                        case '\r':
1042
15.1k
                            ++line_;
1043
15.1k
                            if (stack_.back() == csv_mode::header)
1044
525
                            {
1045
525
                                ++header_line_offset_;
1046
525
                            }
1047
15.1k
                            column_ = 1;
1048
15.1k
                            state_ = csv_parse_state::expect_comment_or_record;
1049
15.1k
                            push_state(state_);
1050
15.1k
                            state_ = csv_parse_state::cr;
1051
15.1k
                            break;
1052
8.03M
                        default:
1053
8.03M
                            ++column_;
1054
8.03M
                            break;
1055
9.87M
                    }
1056
9.87M
                    ++input_ptr_;
1057
9.87M
                    break;
1058
                
1059
5.27M
                case csv_parse_state::expect_comment_or_record:
1060
5.27M
                    buffer_.clear();
1061
5.27M
                    if (curr_char == comment_starter_)
1062
1.83M
                    {
1063
1.83M
                        state_ = csv_parse_state::comment;
1064
1.83M
                        ++column_;
1065
1.83M
                        ++input_ptr_;
1066
1.83M
                    }
1067
3.43M
                    else
1068
3.43M
                    {
1069
3.43M
                        state_ = csv_parse_state::expect_record;
1070
3.43M
                    }
1071
5.27M
                    break;
1072
2.02M
                case csv_parse_state::quoted_string: 
1073
2.02M
                    {
1074
2.02M
                        if (curr_char == quote_escape_char_)
1075
2.47k
                        {
1076
2.47k
                            state_ = csv_parse_state::escaped_value;
1077
2.47k
                        }
1078
2.02M
                        else if (curr_char == quote_char_)
1079
0
                        {
1080
0
                            state_ = csv_parse_state::between_values;
1081
0
                        }
1082
2.02M
                        else
1083
2.02M
                        {
1084
2.02M
                            buffer_.push_back(static_cast<CharT>(curr_char));
1085
2.02M
                        }
1086
2.02M
                    }
1087
2.02M
                    ++column_;
1088
2.02M
                    ++input_ptr_;
1089
2.02M
                    break;
1090
2.44k
                case csv_parse_state::escaped_value: 
1091
2.44k
                    {
1092
2.44k
                        if (curr_char == quote_char_)
1093
417
                        {
1094
417
                            buffer_.push_back(static_cast<CharT>(curr_char));
1095
417
                            state_ = csv_parse_state::quoted_string;
1096
417
                            ++column_;
1097
417
                            ++input_ptr_;
1098
417
                        }
1099
2.03k
                        else if (quote_escape_char_ == quote_char_)
1100
2.03k
                        {
1101
2.03k
                            state_ = csv_parse_state::between_values;
1102
2.03k
                        }
1103
0
                        else
1104
0
                        {
1105
0
                            ec = csv_errc::invalid_escaped_char;
1106
0
                            more_ = false;
1107
0
                            return;
1108
0
                        }
1109
2.44k
                    }
1110
2.44k
                    break;
1111
2.54k
                case csv_parse_state::between_values:
1112
2.54k
                    switch (curr_char)
1113
2.54k
                    {
1114
272
                        case '\r':
1115
1.08k
                        case '\n':
1116
1.08k
                        {
1117
1.08k
                            if (trim_leading_ || trim_trailing_)
1118
0
                            {
1119
0
                                trim_string_buffer(trim_leading_,trim_trailing_);
1120
0
                            }
1121
1.08k
                            if (!(ignore_empty_values_ && buffer_.empty()))
1122
1.08k
                            {
1123
1.08k
                                before_value(local_visitor, ec);
1124
1.08k
                                state_ = csv_parse_state::before_last_quoted_field;
1125
1.08k
                            }
1126
0
                            else
1127
0
                            {
1128
0
                                state_ = csv_parse_state::end_record;
1129
0
                            }
1130
1.08k
                            break;
1131
272
                        }
1132
1.46k
                        default:
1133
1.46k
                            if (curr_char == field_delimiter_)
1134
857
                            {
1135
857
                                if (trim_leading_ || trim_trailing_)
1136
0
                                {
1137
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1138
0
                                }
1139
857
                                before_value(local_visitor, ec);
1140
857
                                state_ = csv_parse_state::before_quoted_field;
1141
857
                            }
1142
607
                            else if (subfield_delimiter_ != char_type() && curr_char == subfield_delimiter_)
1143
0
                            {
1144
0
                                if (trim_leading_ || trim_trailing_)
1145
0
                                {
1146
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1147
0
                                }
1148
0
                                before_value(local_visitor, ec);
1149
0
                                state_ = csv_parse_state::before_quoted_subfield;
1150
0
                            }
1151
607
                            else if (curr_char == ' ' || curr_char == '\t')
1152
529
                            {
1153
529
                                ++column_;
1154
529
                                ++input_ptr_;
1155
529
                            }
1156
78
                            else
1157
78
                            {
1158
78
                                ec = csv_errc::unexpected_char_between_fields;
1159
78
                                more_ = false;
1160
78
                                return;
1161
78
                            }
1162
1.38k
                            break;
1163
2.54k
                    }
1164
2.46k
                    break;
1165
23.9M
                case csv_parse_state::before_unquoted_string: 
1166
23.9M
                {
1167
23.9M
                    buffer_.clear();
1168
23.9M
                    state_ = csv_parse_state::unquoted_string;
1169
23.9M
                    break;
1170
2.54k
                }
1171
23.9M
                case csv_parse_state::before_unquoted_field:
1172
23.9M
                    end_unquoted_string_value(local_visitor, ec);
1173
23.9M
                    state_ = csv_parse_state::before_unquoted_field_tail;
1174
23.9M
                    break;
1175
23.9M
                case csv_parse_state::before_unquoted_field_tail:
1176
23.9M
                {
1177
23.9M
                    if (stack_.back() == csv_mode::subfields)
1178
0
                    {
1179
0
                        stack_.pop_back();
1180
0
                        local_visitor.end_array(*this, ec);
1181
0
                        more_ = !cursor_mode_;
1182
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1183
0
                        {
1184
0
                            if (level() == mark_level_)
1185
0
                            {
1186
0
                                more_ = false;
1187
0
                            }
1188
0
                        }
1189
0
                        --level_;
1190
0
                    }
1191
23.9M
                    ++column_index_;
1192
23.9M
                    state_ = csv_parse_state::before_unquoted_string;
1193
23.9M
                    ++column_;
1194
23.9M
                    ++input_ptr_;
1195
23.9M
                    break;
1196
2.54k
                }
1197
0
                case csv_parse_state::before_unquoted_field_tail1:
1198
0
                {
1199
0
                    if (stack_.back() == csv_mode::subfields)
1200
0
                    {
1201
0
                        stack_.pop_back();
1202
0
                        local_visitor.end_array(*this, ec);
1203
0
                        more_ = !cursor_mode_;
1204
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1205
0
                        {
1206
0
                            if (level() == mark_level_)
1207
0
                            {
1208
0
                                more_ = false;
1209
0
                            }
1210
0
                        }
1211
0
                        --level_;
1212
0
                    }
1213
0
                    state_ = csv_parse_state::end_record;
1214
0
                    ++column_;
1215
0
                    ++input_ptr_;
1216
0
                    break;
1217
2.54k
                }
1218
1219
2.38M
                case csv_parse_state::before_last_unquoted_field:
1220
2.38M
                    end_unquoted_string_value(local_visitor, ec);
1221
2.38M
                    state_ = csv_parse_state::before_last_unquoted_field_tail;
1222
2.38M
                    break;
1223
1224
2.38M
                case csv_parse_state::before_last_unquoted_field_tail:
1225
2.38M
                    if (stack_.back() == csv_mode::subfields)
1226
0
                    {
1227
0
                        stack_.pop_back();
1228
0
                        local_visitor.end_array(*this, ec);
1229
0
                        more_ = !cursor_mode_;
1230
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1231
0
                        {
1232
0
                            if (level() == mark_level_)
1233
0
                            {
1234
0
                                more_ = false;
1235
0
                            }
1236
0
                        }
1237
0
                        --level_;
1238
0
                    }
1239
2.38M
                    ++column_index_;
1240
2.38M
                    state_ = csv_parse_state::end_record;
1241
2.38M
                    break;
1242
1243
0
                case csv_parse_state::before_unquoted_subfield:
1244
0
                    if (stack_.back() == csv_mode::data)
1245
0
                    {
1246
0
                        stack_.push_back(csv_mode::subfields);
1247
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1248
0
                        more_ = !cursor_mode_;
1249
0
                        ++level_;
1250
0
                    }
1251
0
                    state_ = csv_parse_state::before_unquoted_subfield_tail;
1252
0
                    break; 
1253
0
                case csv_parse_state::before_unquoted_subfield_tail:
1254
0
                    end_unquoted_string_value(local_visitor, ec);
1255
0
                    state_ = csv_parse_state::before_unquoted_string;
1256
0
                    ++column_;
1257
0
                    ++input_ptr_;
1258
0
                    break;
1259
857
                case csv_parse_state::before_quoted_field:
1260
857
                    end_quoted_string_value(local_visitor, ec);
1261
857
                    state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted
1262
857
                    break;
1263
0
                case csv_parse_state::before_quoted_subfield:
1264
0
                    if (stack_.back() == csv_mode::data)
1265
0
                    {
1266
0
                        stack_.push_back(csv_mode::subfields);
1267
0
                        local_visitor.begin_array(semantic_tag::none, *this, ec);
1268
0
                        more_ = !cursor_mode_;
1269
0
                        ++level_;
1270
0
                    }
1271
0
                    state_ = csv_parse_state::before_quoted_subfield_tail;
1272
0
                    break; 
1273
0
                case csv_parse_state::before_quoted_subfield_tail:
1274
0
                    end_quoted_string_value(local_visitor, ec);
1275
0
                    state_ = csv_parse_state::before_unquoted_string;
1276
0
                    ++column_;
1277
0
                    ++input_ptr_;
1278
0
                    break;
1279
1.08k
                case csv_parse_state::before_last_quoted_field:
1280
1.08k
                    end_quoted_string_value(local_visitor, ec);
1281
1.08k
                    state_ = csv_parse_state::before_last_quoted_field_tail;
1282
1.08k
                    break;
1283
1.08k
                case csv_parse_state::before_last_quoted_field_tail:
1284
1.08k
                    if (stack_.back() == csv_mode::subfields)
1285
0
                    {
1286
0
                        stack_.pop_back();
1287
0
                        local_visitor.end_array(*this, ec);
1288
0
                        more_ = !cursor_mode_;
1289
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1290
0
                        {
1291
0
                            if (level() == mark_level_)
1292
0
                            {
1293
0
                                more_ = false;
1294
0
                            }
1295
0
                        }
1296
0
                        --level_;
1297
0
                    }
1298
1.08k
                    ++column_index_;
1299
1.08k
                    state_ = csv_parse_state::end_record;
1300
1.08k
                    break;
1301
60.4M
                case csv_parse_state::unquoted_string: 
1302
60.4M
                {
1303
60.4M
                    switch (curr_char)
1304
60.4M
                    {
1305
689k
                        case '\n':
1306
2.38M
                        case '\r':
1307
2.38M
                        {
1308
2.38M
                            if (trim_leading_ || trim_trailing_)
1309
0
                            {
1310
0
                                trim_string_buffer(trim_leading_,trim_trailing_);
1311
0
                            }
1312
2.38M
                            if (!(ignore_empty_values_ && buffer_.empty()))
1313
2.38M
                            {
1314
2.38M
                                before_value(local_visitor, ec);
1315
2.38M
                                state_ = csv_parse_state::before_last_unquoted_field;
1316
2.38M
                            }
1317
0
                            else
1318
0
                            {
1319
0
                                state_ = csv_parse_state::end_record;
1320
0
                            }
1321
2.38M
                            break;
1322
689k
                        }
1323
58.0M
                        default:
1324
58.0M
                            if (curr_char == field_delimiter_)
1325
23.9M
                            {
1326
23.9M
                                if (trim_leading_ || trim_trailing_)
1327
0
                                {
1328
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1329
0
                                }
1330
23.9M
                                before_value(local_visitor, ec);
1331
23.9M
                                state_ = csv_parse_state::before_unquoted_field;
1332
23.9M
                            }
1333
34.0M
                            else if (subfield_delimiter_ != char_type() && curr_char == subfield_delimiter_)
1334
0
                            {
1335
0
                                if (trim_leading_ || trim_trailing_)
1336
0
                                {
1337
0
                                    trim_string_buffer(trim_leading_,trim_trailing_);
1338
0
                                }
1339
0
                                before_value(local_visitor, ec);
1340
0
                                state_ = csv_parse_state::before_unquoted_subfield;
1341
0
                            }
1342
34.0M
                            else if (curr_char == quote_char_)
1343
907
                            {
1344
907
                                buffer_.clear();
1345
907
                                state_ = csv_parse_state::quoted_string;
1346
907
                                ++column_;
1347
907
                                ++input_ptr_;
1348
907
                            }
1349
34.0M
                            else
1350
34.0M
                            {
1351
34.0M
                                buffer_.push_back(static_cast<CharT>(curr_char));
1352
34.0M
                                ++column_;
1353
34.0M
                                ++input_ptr_;
1354
34.0M
                            }
1355
58.0M
                            break;
1356
60.4M
                    }
1357
60.4M
                    break;
1358
60.4M
                }
1359
60.4M
                case csv_parse_state::expect_record: 
1360
6.07M
                {
1361
6.07M
                    switch (curr_char)
1362
6.07M
                    {
1363
1.05M
                        case '\n':
1364
1.05M
                        {
1365
1.05M
                            if (!ignore_empty_lines_)
1366
0
                            {
1367
0
                                begin_record(local_visitor, ec);
1368
0
                                state_ = csv_parse_state::end_record;
1369
0
                            }
1370
1.05M
                            else
1371
1.05M
                            {
1372
1.05M
                                ++line_;
1373
1.05M
                                column_ = 1;
1374
1.05M
                                state_ = csv_parse_state::expect_comment_or_record;
1375
1.05M
                                ++input_ptr_;
1376
1.05M
                            }
1377
1.05M
                            break;
1378
0
                        }
1379
2.64M
                        case '\r':
1380
2.64M
                            if (!ignore_empty_lines_)
1381
0
                            {
1382
0
                                begin_record(local_visitor, ec);
1383
0
                                state_ = csv_parse_state::end_record;
1384
0
                            }
1385
2.64M
                            else
1386
2.64M
                            {
1387
2.64M
                                ++input_ptr_;
1388
2.64M
                                push_state(state_);
1389
2.64M
                                state_ = csv_parse_state::cr;
1390
2.64M
                            }
1391
2.64M
                            break;
1392
700
                        case ' ':
1393
45.4k
                        case '\t':
1394
45.4k
                            if (!trim_leading_)
1395
45.4k
                            {
1396
45.4k
                                buffer_.push_back(static_cast<CharT>(curr_char));
1397
45.4k
                                begin_record(local_visitor, ec);
1398
45.4k
                                state_ = csv_parse_state::unquoted_string;
1399
45.4k
                            }
1400
45.4k
                            ++column_;
1401
45.4k
                            ++input_ptr_;
1402
45.4k
                            break;
1403
2.34M
                        default:
1404
2.34M
                            begin_record(local_visitor, ec);
1405
2.34M
                            if (curr_char == quote_char_)
1406
1.24k
                            {
1407
1.24k
                                buffer_.clear();
1408
1.24k
                                state_ = csv_parse_state::quoted_string;
1409
1.24k
                                ++column_;
1410
1.24k
                                ++input_ptr_;
1411
1.24k
                            }
1412
2.33M
                            else
1413
2.33M
                            {
1414
2.33M
                                state_ = csv_parse_state::unquoted_string;
1415
2.33M
                            }
1416
2.34M
                            break;
1417
6.07M
                        }
1418
6.07M
                    break;
1419
6.07M
                    }
1420
6.07M
                case csv_parse_state::end_record: 
1421
2.38M
                {
1422
2.38M
                    switch (curr_char)
1423
2.38M
                    {
1424
690k
                        case '\n':
1425
690k
                        {
1426
690k
                            ++line_;
1427
690k
                            column_ = 1;
1428
690k
                            state_ = csv_parse_state::expect_comment_or_record;
1429
690k
                            end_record(local_visitor, ec);
1430
690k
                            ++input_ptr_;
1431
690k
                            break;
1432
0
                        }
1433
1.69M
                        case '\r':
1434
1.69M
                            ++line_;
1435
1.69M
                            column_ = 1;
1436
1.69M
                            state_ = csv_parse_state::expect_comment_or_record;
1437
1.69M
                            end_record(local_visitor, ec);
1438
1.69M
                            push_state(state_);
1439
1.69M
                            state_ = csv_parse_state::cr;
1440
1.69M
                            ++input_ptr_;
1441
1.69M
                            break;
1442
0
                        case ' ':
1443
0
                        case '\t':
1444
0
                            ++column_;
1445
0
                            ++input_ptr_;
1446
0
                            break;
1447
0
                        default:
1448
0
                            err_handler_(csv_errc::syntax_error, *this);
1449
0
                            ec = csv_errc::syntax_error;
1450
0
                            more_ = false;
1451
0
                            return;
1452
2.38M
                        }
1453
2.38M
                    break;
1454
2.38M
                }
1455
2.38M
                default:
1456
0
                    err_handler_(csv_errc::invalid_parse_state, *this);
1457
0
                    ec = csv_errc::invalid_parse_state;
1458
0
                    more_ = false;
1459
0
                    return;
1460
166M
            }
1461
166M
            if (line_ > max_lines_)
1462
0
            {
1463
0
                state_ = csv_parse_state::done;
1464
0
                more_ = false;
1465
0
            }
1466
166M
        }
1467
14.0k
    }
1468
1469
    void finish_parse()
1470
    {
1471
        std::error_code ec;
1472
        finish_parse(ec);
1473
        if (JSONCONS_UNLIKELY(ec))
1474
        {
1475
            JSONCONS_THROW(ser_error(ec,line_,column_));
1476
        }
1477
    }
1478
1479
    void finish_parse(std::error_code& ec)
1480
    {
1481
        while (more_)
1482
        {
1483
            parse_some(ec);
1484
        }
1485
    }
1486
1487
    csv_parse_state state() const
1488
    {
1489
        return state_;
1490
    }
1491
1492
    void update(const string_view_type sv)
1493
    {
1494
        update(sv.data(),sv.length());
1495
    }
1496
1497
    void update(const CharT* data, std::size_t length)
1498
2.51k
    {
1499
2.51k
        begin_input_ = data;
1500
2.51k
        input_end_ = data + length;
1501
2.51k
        input_ptr_ = begin_input_;
1502
2.51k
    }
1503
1504
    std::size_t line() const override
1505
91
    {
1506
91
        return line_;
1507
91
    }
1508
1509
    std::size_t column() const override
1510
91
    {
1511
91
        return column_;
1512
91
    }
1513
1514
private:
1515
    void initialize()
1516
2.52k
    {
1517
2.52k
        stack_.reserve(default_depth);
1518
2.52k
        stack_.push_back(csv_mode::initial);
1519
2.52k
        stack_.push_back((header_lines_ > 0) ? csv_mode::header : csv_mode::data);
1520
2.52k
    }
1521
1522
    // name
1523
    void before_value(basic_json_visitor<CharT>& visitor, 
1524
        std::error_code& ec)
1525
26.3M
    {
1526
26.3M
        switch (stack_.back())
1527
26.3M
        {
1528
11.0M
            case csv_mode::header:
1529
11.0M
                if (trim_leading_inside_quotes_ || trim_trailing_inside_quotes_)
1530
0
                {
1531
0
                    trim_string_buffer(trim_leading_inside_quotes_,trim_trailing_inside_quotes_);
1532
0
                }
1533
11.0M
                if (line_ == (header_line_+header_line_offset_) && column_index_ >= min_column_names_)
1534
11.0M
                {
1535
11.0M
                    column_names_.push_back(buffer_);
1536
11.0M
                    if (assume_header_ && mapping_kind_ == csv_mapping_kind::n_rows)
1537
11.0M
                    {
1538
11.0M
                        visitor.string_value(buffer_, semantic_tag::none, *this, ec);
1539
11.0M
                        more_ = !cursor_mode_;
1540
11.0M
                    }
1541
11.0M
                }
1542
11.0M
                break;
1543
15.2M
            case csv_mode::data:
1544
15.2M
                if (mapping_kind_ == csv_mapping_kind::n_objects)
1545
0
                {
1546
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1547
0
                    {
1548
0
                        if (column_index_ < column_names_.size() + offset_)
1549
0
                        {
1550
0
                            visitor.key(column_names_[column_index_ - offset_], *this, ec);
1551
0
                            more_ = !cursor_mode_;
1552
0
                        }
1553
0
                    }
1554
0
                }
1555
15.2M
                break;
1556
0
            default:
1557
0
                break;
1558
26.3M
        }
1559
26.3M
    }
1560
1561
    // begin_array or begin_record
1562
    void begin_record(basic_json_visitor<CharT>& visitor, std::error_code& ec)
1563
2.38M
    {
1564
2.38M
        offset_ = 0;
1565
1566
2.38M
        if (stack_.back() == csv_mode::header && line_ > (header_lines_+header_line_offset_))
1567
1.63k
        {
1568
1.63k
            stack_.back() = csv_mode::data;
1569
1.63k
        }
1570
2.38M
        switch (stack_.back())
1571
2.38M
        {
1572
812
            case csv_mode::header:
1573
812
                switch (mapping_kind_)
1574
812
                {
1575
812
                    case csv_mapping_kind::n_rows:
1576
812
                        if (assume_header_ && line_ == (header_line_+header_line_offset_))
1577
812
                        {
1578
812
                            visitor.begin_array(semantic_tag::none, *this, ec);
1579
812
                            more_ = !cursor_mode_;
1580
812
                            ++level_;
1581
812
                        }
1582
812
                        break;
1583
0
                    default:
1584
0
                        break;
1585
812
                }
1586
812
                break;
1587
2.38M
            case csv_mode::data:
1588
2.38M
                switch (mapping_kind_)
1589
2.38M
                {
1590
2.38M
                    case csv_mapping_kind::n_rows:
1591
2.38M
                        visitor.begin_array(semantic_tag::none, *this, ec);
1592
2.38M
                        more_ = !cursor_mode_;
1593
2.38M
                        ++level_;
1594
2.38M
                        break;
1595
0
                    case csv_mapping_kind::n_objects:
1596
0
                        visitor.begin_object(semantic_tag::none, *this, ec);
1597
0
                        more_ = !cursor_mode_;
1598
0
                        ++level_;
1599
0
                        break;
1600
0
                    case csv_mapping_kind::m_columns:
1601
0
                        break;
1602
0
                    default:
1603
0
                        break;
1604
2.38M
                }
1605
2.38M
                break;
1606
2.38M
            default:
1607
0
                break;
1608
2.38M
        }
1609
2.38M
    }
1610
1611
    // end_array, begin_array, string_value (headers)
1612
    void end_record(basic_json_visitor<CharT>& visitor, std::error_code& ec)
1613
2.38M
    {
1614
2.38M
        if (!column_types_.empty())
1615
0
        {
1616
0
            switch (mapping_kind_)
1617
0
            {
1618
0
                case csv_mapping_kind::n_rows:
1619
0
                case csv_mapping_kind::n_objects:
1620
0
                    if (depth_ > 0)
1621
0
                    {
1622
0
                        visitor.end_array(*this, ec);
1623
0
                        more_ = !cursor_mode_;
1624
0
                        if (level_ == mark_level_)
1625
0
                        {
1626
0
                            more_ = false;
1627
0
                        }
1628
0
                        --level_;
1629
0
                        depth_ = 0;
1630
0
                    }
1631
0
                    break;
1632
0
                case csv_mapping_kind::m_columns:
1633
0
                    if (depth_ > 0)
1634
0
                    {
1635
0
                        visitor.end_array(*this, ec);
1636
0
                        more_ = !cursor_mode_;
1637
0
                        --level_;
1638
0
                        depth_ = 0;
1639
0
                    }
1640
0
                    break;
1641
0
                default:
1642
0
                    break;
1643
0
            }
1644
0
        }
1645
2.38M
        switch (stack_.back())
1646
2.38M
        {
1647
713
            case csv_mode::header:
1648
713
                if (line_ >= header_lines_)
1649
713
                {
1650
713
                    stack_.back() = csv_mode::data;
1651
713
                }
1652
713
                switch (mapping_kind_)
1653
713
                {
1654
713
                    case csv_mapping_kind::n_rows:
1655
713
                        if (assume_header_)
1656
713
                        {
1657
713
                            visitor.end_array(*this, ec);
1658
713
                            more_ = !cursor_mode_;
1659
713
                            if (level() == mark_level_)
1660
0
                            {
1661
0
                                more_ = false;
1662
0
                            }
1663
713
                            --level_;
1664
713
                        }
1665
713
                        break;
1666
0
                    case csv_mapping_kind::m_columns:
1667
0
                        m_columns_filter_.initialize(column_names_);
1668
0
                        break;
1669
0
                    default:
1670
0
                        break;
1671
713
                }
1672
713
                break;
1673
2.38M
            case csv_mode::data:
1674
2.38M
            case csv_mode::subfields:
1675
2.38M
            {
1676
2.38M
                switch (mapping_kind_)
1677
2.38M
                {
1678
2.38M
                    case csv_mapping_kind::n_rows:
1679
2.38M
                        visitor.end_array(*this, ec);
1680
2.38M
                        more_ = !cursor_mode_;
1681
2.38M
                        if (level_ == mark_level_)
1682
0
                        {
1683
0
                            more_ = false;
1684
0
                        }
1685
2.38M
                        --level_;
1686
2.38M
                        break;
1687
0
                    case csv_mapping_kind::n_objects:
1688
0
                        visitor.end_object(*this, ec);
1689
0
                        more_ = !cursor_mode_;
1690
0
                        if (level_ == mark_level_)
1691
0
                        {
1692
0
                            more_ = false;
1693
0
                        }
1694
0
                        --level_;
1695
0
                        break;
1696
0
                    case csv_mapping_kind::m_columns:
1697
0
                        visitor.end_array(*this, ec);
1698
0
                        more_ = !cursor_mode_;
1699
0
                        --level_;
1700
0
                        break;
1701
2.38M
                }
1702
2.38M
                break;
1703
2.38M
            }
1704
2.38M
            default:
1705
0
                break;
1706
2.38M
        }
1707
2.38M
        column_index_ = 0;
1708
2.38M
    }
1709
1710
    void trim_string_buffer(bool trim_leading, bool trim_trailing)
1711
0
    {
1712
0
        std::size_t start = 0;
1713
0
        std::size_t length = buffer_.length();
1714
0
        if (trim_leading)
1715
0
        {
1716
0
            bool done = false;
1717
0
            while (!done && start < buffer_.length())
1718
0
            {
1719
0
                if ((buffer_[start] < 256) && std::isspace(buffer_[start]))
1720
0
                {
1721
0
                    ++start;
1722
0
                }
1723
0
                else
1724
0
                {
1725
0
                    done = true;
1726
0
                }
1727
0
            }
1728
0
        }
1729
0
        if (trim_trailing)
1730
0
        {
1731
0
            bool done = false;
1732
0
            while (!done && length > 0)
1733
0
            {
1734
0
                if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1]))
1735
0
                {
1736
0
                    --length;
1737
0
                }
1738
0
                else
1739
0
                {
1740
0
                    done = true;
1741
0
                }
1742
0
            }
1743
0
        }
1744
0
        if (start != 0 || length != buffer_.size())
1745
0
        {
1746
            // Do not use buffer_.substr(...), as this won't preserve the allocator state.
1747
0
            buffer_.resize(length);
1748
0
            buffer_.erase(0, start);
1749
0
        }
1750
0
    }
1751
1752
    /*
1753
        end_array, begin_array, xxx_value (end_value)
1754
    */
1755
    void end_unquoted_string_value(basic_json_visitor<CharT>& visitor, std::error_code& ec) 
1756
26.3M
    {
1757
26.3M
        switch (stack_.back())
1758
26.3M
        {
1759
15.2M
            case csv_mode::data:
1760
15.2M
            case csv_mode::subfields:
1761
15.2M
                switch (mapping_kind_)
1762
15.2M
                {
1763
15.2M
                case csv_mapping_kind::n_rows:
1764
15.2M
                    if (unquoted_empty_value_is_null_ && buffer_.empty())
1765
0
                    {
1766
0
                        visitor.null_value(semantic_tag::none, *this, ec);
1767
0
                        more_ = !cursor_mode_;
1768
0
                    }
1769
15.2M
                    else
1770
15.2M
                    {
1771
15.2M
                        end_value(visitor, infer_types_, ec);
1772
15.2M
                    }
1773
15.2M
                    break;
1774
0
                case csv_mapping_kind::n_objects:
1775
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1776
0
                    {
1777
0
                        if (column_index_ < column_names_.size() + offset_)
1778
0
                        {
1779
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1780
0
                            {
1781
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1782
0
                                more_ = !cursor_mode_;
1783
0
                            }
1784
0
                            else
1785
0
                            {
1786
0
                                end_value(visitor, infer_types_, ec);
1787
0
                            }
1788
0
                        }
1789
0
                        else if (depth_ > 0)
1790
0
                        {
1791
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1792
0
                            {
1793
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1794
0
                                more_ = !cursor_mode_;
1795
0
                            }
1796
0
                            else
1797
0
                            {
1798
0
                                end_value(visitor, infer_types_, ec);
1799
0
                            }
1800
0
                        }
1801
0
                    }
1802
0
                    break;
1803
0
                case csv_mapping_kind::m_columns:
1804
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1805
0
                    {
1806
0
                        end_value(visitor, infer_types_, ec);
1807
0
                    }
1808
0
                    else
1809
0
                    {
1810
0
                        m_columns_filter_.skip_column();
1811
0
                    }
1812
0
                    break;
1813
15.2M
                }
1814
15.2M
                break;
1815
15.2M
            default:
1816
11.0M
                break;
1817
26.3M
        }
1818
26.3M
    }
1819
1820
    void end_quoted_string_value(basic_json_visitor<CharT>& visitor, 
1821
        std::error_code& ec) 
1822
1.96k
    {
1823
1.96k
        switch (stack_.back())
1824
1.96k
        {
1825
1.17k
            case csv_mode::data:
1826
1.17k
            case csv_mode::subfields:
1827
1.17k
                if (trim_leading_inside_quotes_ || trim_trailing_inside_quotes_)
1828
0
                {
1829
0
                    trim_string_buffer(trim_leading_inside_quotes_,trim_trailing_inside_quotes_);
1830
0
                }
1831
1.17k
                switch (mapping_kind_)
1832
1.17k
                {
1833
1.17k
                case csv_mapping_kind::n_rows:
1834
1.17k
                    end_value(visitor, false, ec);
1835
1.17k
                    break;
1836
0
                case csv_mapping_kind::n_objects:
1837
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1838
0
                    {
1839
0
                        if (column_index_ < column_names_.size() + offset_)
1840
0
                        {
1841
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1842
0
                            {
1843
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1844
0
                                more_ = !cursor_mode_;
1845
0
                            }
1846
0
                            else 
1847
0
                            {
1848
0
                                end_value(visitor, false, ec);
1849
0
                            }
1850
0
                        }
1851
0
                        else if (depth_ > 0)
1852
0
                        {
1853
0
                            if (unquoted_empty_value_is_null_ && buffer_.empty())
1854
0
                            {
1855
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1856
0
                                more_ = !cursor_mode_;
1857
0
                            }
1858
0
                            else
1859
0
                            {
1860
0
                                end_value(visitor, false, ec);
1861
0
                            }
1862
0
                        }
1863
0
                    }
1864
0
                    break;
1865
0
                case csv_mapping_kind::m_columns:
1866
0
                    if (!(ignore_empty_values_ && buffer_.empty()))
1867
0
                    {
1868
0
                        end_value(visitor, false, ec);
1869
0
                    }
1870
0
                    else
1871
0
                    {
1872
0
                        m_columns_filter_.skip_column();
1873
0
                    }
1874
0
                    break;
1875
1.17k
                }
1876
1.17k
                break;
1877
1.17k
            default:
1878
794
                break;
1879
1.96k
        }
1880
1.96k
    }
1881
1882
    void end_value(basic_json_visitor<CharT>& visitor, 
1883
        bool infer_types, std::error_code&  ec)
1884
15.2M
    {
1885
15.2M
        auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ });
1886
15.2M
        if (it != string_double_map_.end())
1887
0
        {
1888
0
            visitor.double_value((*it).second, semantic_tag::none, *this, ec);
1889
0
            more_ = !cursor_mode_;
1890
0
        }
1891
15.2M
        else if (column_index_ < column_types_.size() + offset_)
1892
0
        {
1893
0
            if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t)
1894
0
            {
1895
0
                offset_ = offset_ + column_types_[column_index_ - offset_].rep_count;
1896
0
                if (column_index_ - offset_ + 1 < column_types_.size())
1897
0
                {
1898
0
                    if (column_index_ == offset_ || depth_ > column_types_[column_index_-offset_].level)
1899
0
                    {
1900
0
                        visitor.end_array(*this, ec);
1901
0
                        more_ = !cursor_mode_;
1902
0
                        if (mapping_kind_ != csv_mapping_kind::m_columns)
1903
0
                        {
1904
0
                            if (level() == mark_level_)
1905
0
                            {
1906
0
                                more_ = false;
1907
0
                            }
1908
0
                        }
1909
0
                        --level_;
1910
0
                    }
1911
0
                    depth_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level;
1912
0
                }
1913
0
            }
1914
0
            if (depth_ < column_types_[column_index_ - offset_].level)
1915
0
            {
1916
0
                visitor.begin_array(semantic_tag::none, *this, ec);
1917
0
                more_ = !cursor_mode_;
1918
0
                depth_ = column_types_[column_index_ - offset_].level;
1919
0
                ++level_;
1920
0
            }
1921
0
            else if (depth_ > column_types_[column_index_ - offset_].level)
1922
0
            {
1923
0
                visitor.end_array(*this, ec);
1924
0
                more_ = !cursor_mode_;
1925
0
                if (mapping_kind_ != csv_mapping_kind::m_columns)
1926
0
                {
1927
0
                    if (level() == mark_level_)
1928
0
                    {
1929
0
                        more_ = false;
1930
0
                    }
1931
0
                }
1932
0
                --level_;
1933
0
                depth_ = column_types_[column_index_ - offset_].level;
1934
0
            }
1935
0
            switch (column_types_[column_index_ - offset_].col_type)
1936
0
            {
1937
0
                case csv_column_type::integer_t:
1938
0
                    {
1939
0
                        std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_};
1940
0
                        int64_t val;
1941
0
                        iss >> val;
1942
0
                        if (!iss.fail())
1943
0
                        {
1944
0
                            visitor.int64_value(val, semantic_tag::none, *this, ec);
1945
0
                            more_ = !cursor_mode_;
1946
0
                        }
1947
0
                        else
1948
0
                        {
1949
0
                            if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1950
0
                            {
1951
0
                                basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1952
0
                                parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1953
0
                                parser.parse_some(visitor);
1954
0
                                parser.finish_parse(visitor);
1955
0
                            }
1956
0
                            else
1957
0
                            {
1958
0
                                visitor.null_value(semantic_tag::none, *this, ec);
1959
0
                                more_ = !cursor_mode_;
1960
0
                            }
1961
0
                        }
1962
0
                    }
1963
0
                    break;
1964
0
                case csv_column_type::float_t:
1965
0
                    {
1966
0
                        if (lossless_number_)
1967
0
                        {
1968
0
                            visitor.string_value(buffer_,semantic_tag::bigdec, *this, ec);
1969
0
                            more_ = !cursor_mode_;
1970
0
                        }
1971
0
                        else
1972
0
                        {
1973
0
                            std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ };
1974
0
                            double val;
1975
0
                            iss >> val;
1976
0
                            if (!iss.fail())
1977
0
                            {
1978
0
                                visitor.double_value(val, semantic_tag::none, *this, ec);
1979
0
                                more_ = !cursor_mode_;
1980
0
                            }
1981
0
                            else
1982
0
                            {
1983
0
                                if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1984
0
                                {
1985
0
                                    basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1986
0
                                    parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1987
0
                                    parser.parse_some(visitor);
1988
0
                                    parser.finish_parse(visitor);
1989
0
                                }
1990
0
                                else
1991
0
                                {
1992
0
                                    visitor.null_value(semantic_tag::none, *this, ec);
1993
0
                                    more_ = !cursor_mode_;
1994
0
                                }
1995
0
                            }
1996
0
                        }
1997
0
                    }
1998
0
                    break;
1999
0
                case csv_column_type::boolean_t:
2000
0
                    {
2001
0
                        if (buffer_.length() == 1 && buffer_[0] == '0')
2002
0
                        {
2003
0
                            visitor.bool_value(false, semantic_tag::none, *this, ec);
2004
0
                            more_ = !cursor_mode_;
2005
0
                        }
2006
0
                        else if (buffer_.length() == 1 && buffer_[0] == '1')
2007
0
                        {
2008
0
                            visitor.bool_value(true, semantic_tag::none, *this, ec);
2009
0
                            more_ = !cursor_mode_;
2010
0
                        }
2011
0
                        else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E')))
2012
0
                        {
2013
0
                            visitor.bool_value(false, semantic_tag::none, *this, ec);
2014
0
                            more_ = !cursor_mode_;
2015
0
                        }
2016
0
                        else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E')))
2017
0
                        {
2018
0
                            visitor.bool_value(true, semantic_tag::none, *this, ec);
2019
0
                            more_ = !cursor_mode_;
2020
0
                        }
2021
0
                        else
2022
0
                        {
2023
0
                            if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
2024
0
                            {
2025
0
                                basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
2026
0
                                parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
2027
0
                                parser.parse_some(visitor);
2028
0
                                parser.finish_parse(visitor);
2029
0
                            }
2030
0
                            else
2031
0
                            {
2032
0
                                visitor.null_value(semantic_tag::none, *this, ec);
2033
0
                                more_ = !cursor_mode_;
2034
0
                            }
2035
0
                        }
2036
0
                    }
2037
0
                    break;
2038
0
                default:
2039
0
                    if (buffer_.length() > 0)
2040
0
                    {
2041
0
                        visitor.string_value(buffer_, semantic_tag::none, *this, ec);
2042
0
                        more_ = !cursor_mode_;
2043
0
                    }
2044
0
                    else
2045
0
                    {
2046
0
                        if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0)
2047
0
                        {
2048
0
                            basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
2049
0
                            parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
2050
0
                            parser.parse_some(visitor);
2051
0
                            parser.finish_parse(visitor);
2052
0
                        }
2053
0
                        else
2054
0
                        {
2055
0
                            visitor.string_value(string_view_type(), semantic_tag::none, *this, ec);
2056
0
                            more_ = !cursor_mode_;
2057
0
                        }
2058
0
                    }
2059
0
                    break;  
2060
0
            }
2061
0
        }
2062
15.2M
        else
2063
15.2M
        {
2064
15.2M
            if (infer_types)
2065
15.2M
            {
2066
15.2M
                end_value_with_numeric_check(visitor, ec);
2067
15.2M
            }
2068
1.17k
            else
2069
1.17k
            {
2070
1.17k
                visitor.string_value(buffer_, semantic_tag::none, *this, ec);
2071
1.17k
                more_ = !cursor_mode_;
2072
1.17k
            }
2073
15.2M
        }
2074
15.2M
    }
2075
2076
    enum class numeric_check_state 
2077
    {
2078
        initial,
2079
        null,
2080
        boolean_true,
2081
        boolean_false,
2082
        minus,
2083
        zero,
2084
        integer,
2085
        fraction1,
2086
        fraction,
2087
        exp1,
2088
        exp,
2089
        not_a_number
2090
    };
2091
2092
    /*
2093
        xxx_value 
2094
    */
2095
    void end_value_with_numeric_check(basic_json_visitor<CharT>& visitor, 
2096
        std::error_code& ec)
2097
15.2M
    {
2098
15.2M
        numeric_check_state state = numeric_check_state::initial;
2099
15.2M
        bool is_negative = false;
2100
        //int precision = 0;
2101
        //uint8_t decimal_places = 0;
2102
2103
15.2M
        auto last = buffer_.end();
2104
2105
15.2M
        std::string buffer;
2106
31.3M
        for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p)
2107
16.1M
        {
2108
16.1M
            switch (state)
2109
16.1M
            {
2110
2.29M
                case numeric_check_state::initial:
2111
2.29M
                {
2112
2.29M
                    switch (*p)
2113
2.29M
                    {
2114
64.1k
                    case 'n':case 'N':
2115
64.1k
                        if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L'))
2116
37.2k
                        {
2117
37.2k
                            state = numeric_check_state::null;
2118
37.2k
                        }
2119
26.8k
                        else
2120
26.8k
                        {
2121
26.8k
                            state = numeric_check_state::not_a_number;
2122
26.8k
                        }
2123
64.1k
                        break;
2124
123k
                    case 't':case 'T':
2125
123k
                        if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U'))
2126
110k
                        {
2127
110k
                            state = numeric_check_state::boolean_true;
2128
110k
                        }
2129
13.0k
                        else
2130
13.0k
                        {
2131
13.0k
                            state = numeric_check_state::not_a_number;
2132
13.0k
                        }
2133
123k
                        break;
2134
3.89k
                    case 'f':case 'F':
2135
3.89k
                        if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E'))
2136
967
                        {
2137
967
                            state = numeric_check_state::boolean_false;
2138
967
                        }
2139
2.93k
                        else
2140
2.93k
                        {
2141
2.93k
                            state = numeric_check_state::not_a_number;
2142
2.93k
                        }
2143
3.89k
                        break;
2144
415k
                    case '-':
2145
415k
                        is_negative = true;
2146
415k
                        buffer.push_back(*p);
2147
415k
                        state = numeric_check_state::minus;
2148
415k
                        break;
2149
133k
                    case '0':
2150
                        //++precision;
2151
133k
                        buffer.push_back(*p);
2152
133k
                        state = numeric_check_state::zero;
2153
133k
                        break;
2154
167k
                    case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2155
                        //++precision;
2156
167k
                        buffer.push_back(*p);
2157
167k
                        state = numeric_check_state::integer;
2158
167k
                        break;
2159
1.38M
                    default:
2160
1.38M
                        state = numeric_check_state::not_a_number;
2161
1.38M
                        break;
2162
2.29M
                    }
2163
2.29M
                    break;
2164
2.29M
                }
2165
2.29M
                case numeric_check_state::zero:
2166
97.3k
                {
2167
97.3k
                    switch (*p)
2168
97.3k
                    {
2169
77.8k
                        case '.':
2170
77.8k
                        {
2171
77.8k
                            buffer.push_back('.');
2172
77.8k
                            state = numeric_check_state::fraction1;
2173
77.8k
                        }
2174
77.8k
                        break;
2175
8.19k
                    case 'e':case 'E':
2176
8.19k
                        buffer.push_back(*p);
2177
8.19k
                        state = numeric_check_state::exp1;
2178
8.19k
                        break;
2179
11.3k
                    default:
2180
11.3k
                        state = numeric_check_state::not_a_number;
2181
11.3k
                        break;
2182
97.3k
                    }
2183
97.3k
                    break;
2184
97.3k
                }
2185
12.7M
                case numeric_check_state::integer:
2186
12.7M
                {
2187
12.7M
                    switch (*p)
2188
12.7M
                    {
2189
12.6M
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2190
                        //++precision;
2191
12.6M
                        buffer.push_back(*p);
2192
12.6M
                        break;
2193
18.4k
                    case '.':
2194
18.4k
                        buffer.push_back('.');
2195
18.4k
                        state = numeric_check_state::fraction1;
2196
18.4k
                        break;
2197
9.93k
                    case 'e':case 'E':
2198
9.93k
                        buffer.push_back(*p);
2199
9.93k
                        state = numeric_check_state::exp1;
2200
9.93k
                        break;
2201
11.0k
                    default:
2202
11.0k
                        state = numeric_check_state::not_a_number;
2203
11.0k
                        break;
2204
12.7M
                    }
2205
12.7M
                    break;
2206
12.7M
                }
2207
12.7M
                case numeric_check_state::minus:
2208
397k
                {
2209
397k
                    switch (*p)
2210
397k
                    {
2211
247k
                    case '0':
2212
                        //++precision;
2213
247k
                        buffer.push_back(*p);
2214
247k
                        state = numeric_check_state::zero;
2215
247k
                        break;
2216
142k
                    case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2217
                        //++precision;
2218
142k
                        buffer.push_back(*p);
2219
142k
                        state = numeric_check_state::integer;
2220
142k
                        break;
2221
6.27k
                    default:
2222
6.27k
                        state = numeric_check_state::not_a_number;
2223
6.27k
                        break;
2224
397k
                    }
2225
397k
                    break;
2226
397k
                }
2227
397k
                case numeric_check_state::fraction1:
2228
92.6k
                {
2229
92.6k
                    switch (*p)
2230
92.6k
                    {
2231
88.6k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2232
                        //++precision;
2233
                        //++decimal_places;
2234
88.6k
                        buffer.push_back(*p);
2235
88.6k
                        state = numeric_check_state::fraction;
2236
88.6k
                        break;
2237
3.96k
                    default:
2238
3.96k
                        state = numeric_check_state::not_a_number;
2239
3.96k
                        break;
2240
92.6k
                    }
2241
92.6k
                    break;
2242
92.6k
                }
2243
92.6k
                case numeric_check_state::fraction:
2244
14.3k
                {
2245
14.3k
                    switch (*p)
2246
14.3k
                    {
2247
12.4k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2248
                        //++precision;
2249
                        //++decimal_places;
2250
12.4k
                        buffer.push_back(*p);
2251
12.4k
                        break;
2252
927
                    case 'e':case 'E':
2253
927
                        buffer.push_back(*p);
2254
927
                        state = numeric_check_state::exp1;
2255
927
                        break;
2256
933
                    default:
2257
933
                        state = numeric_check_state::not_a_number;
2258
933
                        break;
2259
14.3k
                    }
2260
14.3k
                    break;
2261
14.3k
                }
2262
15.6k
                case numeric_check_state::exp1:
2263
15.6k
                {
2264
15.6k
                    switch (*p)
2265
15.6k
                    {
2266
1.34k
                    case '-':
2267
1.34k
                        buffer.push_back(*p);
2268
1.34k
                        break;
2269
1.52k
                    case '+':
2270
1.52k
                        break;
2271
9.35k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2272
9.35k
                        state = numeric_check_state::exp;
2273
9.35k
                        buffer.push_back(*p);
2274
9.35k
                        break;
2275
3.44k
                    default:
2276
3.44k
                        state = numeric_check_state::not_a_number;
2277
3.44k
                        break;
2278
15.6k
                    }
2279
15.6k
                    break;
2280
15.6k
                }
2281
29.4k
                case numeric_check_state::exp:
2282
29.4k
                {
2283
29.4k
                    switch (*p)
2284
29.4k
                    {
2285
24.9k
                    case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
2286
24.9k
                        buffer.push_back(*p);
2287
24.9k
                        break;
2288
4.48k
                    default:
2289
4.48k
                        state = numeric_check_state::not_a_number;
2290
4.48k
                        break;
2291
29.4k
                    }
2292
29.4k
                    break;
2293
29.4k
                }
2294
447k
                default:
2295
447k
                    break;
2296
16.1M
            }
2297
16.1M
        }
2298
2299
15.2M
        switch (state)
2300
15.2M
        {
2301
37.2k
            case numeric_check_state::null:
2302
37.2k
                visitor.null_value(semantic_tag::none, *this, ec);
2303
37.2k
                more_ = !cursor_mode_;
2304
37.2k
                break;
2305
110k
            case numeric_check_state::boolean_true:
2306
110k
                visitor.bool_value(true, semantic_tag::none, *this, ec);
2307
110k
                more_ = !cursor_mode_;
2308
110k
                break;
2309
967
            case numeric_check_state::boolean_false:
2310
967
                visitor.bool_value(false, semantic_tag::none, *this, ec);
2311
967
                more_ = !cursor_mode_;
2312
967
                break;
2313
284k
            case numeric_check_state::zero:
2314
554k
            case numeric_check_state::integer:
2315
554k
            {
2316
554k
                if (is_negative)
2317
378k
                {
2318
378k
                    int64_t val{ 0 };
2319
378k
                    auto result = jsoncons::utility::dec_to_integer(buffer_.data(), buffer_.length(), val);
2320
378k
                    if (result)
2321
375k
                    {
2322
375k
                        visitor.int64_value(val, semantic_tag::none, *this, ec);
2323
375k
                        more_ = !cursor_mode_;
2324
375k
                    }
2325
3.59k
                    else // Must be overflow
2326
3.59k
                    {
2327
3.59k
                        visitor.string_value(buffer_, semantic_tag::bigint, *this, ec);
2328
3.59k
                        more_ = !cursor_mode_;
2329
3.59k
                    }
2330
378k
                }
2331
176k
                else
2332
176k
                {
2333
176k
                    uint64_t val{ 0 };
2334
176k
                    auto result = jsoncons::utility::dec_to_integer(buffer_.data(), buffer_.length(), val);
2335
176k
                    if (result)
2336
173k
                    {
2337
173k
                        visitor.uint64_value(val, semantic_tag::none, *this, ec);
2338
173k
                        more_ = !cursor_mode_;
2339
173k
                    }
2340
2.31k
                    else if (result.ec == std::errc::result_out_of_range)
2341
2.31k
                    {
2342
2.31k
                        visitor.string_value(buffer_, semantic_tag::bigint, *this, ec);
2343
2.31k
                        more_ = !cursor_mode_;
2344
2.31k
                    }
2345
0
                    else
2346
0
                    {
2347
0
                        ec = csv_errc::invalid_number; 
2348
0
                        more_ = false;
2349
0
                        return;
2350
0
                    }
2351
176k
                }
2352
554k
                break;
2353
554k
            }
2354
554k
            case numeric_check_state::fraction:
2355
91.6k
            case numeric_check_state::exp:
2356
91.6k
            {
2357
91.6k
                if (lossless_number_)
2358
0
                {
2359
0
                    visitor.string_value(buffer_,semantic_tag::bigdec, *this, ec);
2360
0
                    more_ = !cursor_mode_;
2361
0
                }
2362
91.6k
                else
2363
91.6k
                {
2364
91.6k
                    double d{0};
2365
91.6k
                    auto result = jsoncons::utility::decstr_to_double(buffer.c_str(), buffer.length(), d);
2366
91.6k
                    if (result.ec == std::errc::result_out_of_range)
2367
651
                    {
2368
651
                        d = buffer.front() == '-' ? -HUGE_VAL : HUGE_VAL;
2369
651
                    }
2370
91.0k
                    else if (result.ec == std::errc::invalid_argument)
2371
6
                    {
2372
6
                        ec = csv_errc::invalid_number; 
2373
6
                        more_ = false;
2374
6
                        return;
2375
6
                    }
2376
91.6k
                    visitor.double_value(d, semantic_tag::none, *this, ec);
2377
91.6k
                    more_ = !cursor_mode_;
2378
91.6k
                }
2379
91.6k
                break;
2380
91.6k
            }
2381
14.4M
            default:
2382
14.4M
            {
2383
14.4M
                visitor.string_value(buffer_, semantic_tag::none, *this, ec);
2384
14.4M
                more_ = !cursor_mode_;
2385
14.4M
                break;
2386
91.6k
            }
2387
15.2M
        }
2388
15.2M
    } 
2389
2390
    void push_state(csv_parse_state state)
2391
4.34M
    {
2392
4.34M
        state_stack_.push_back(state);
2393
4.34M
    }
2394
2395
    csv_parse_state pop_state()
2396
4.34M
    {
2397
4.34M
        JSONCONS_ASSERT(!state_stack_.empty())
2398
4.34M
        csv_parse_state state = state_stack_.back();
2399
4.34M
        state_stack_.pop_back();
2400
4.34M
        return state;
2401
4.34M
    }
2402
};
2403
2404
using csv_parser = basic_csv_parser<char>;
2405
using wcsv_parser = basic_csv_parser<wchar_t>;
2406
2407
}}
2408
2409
#endif
2410