/proc/self/cwd/pw_tokenizer/csv.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2024 The Pigweed Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
4 | | // use this file except in compliance with the License. You may obtain a copy of |
5 | | // the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
11 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
12 | | // License for the specific language governing permissions and limitations under |
13 | | // the License. |
14 | | |
15 | | #include "pw_tokenizer_private/csv.h" |
16 | | |
17 | | #include "pw_log/log.h" |
18 | | |
19 | | namespace pw::tokenizer::internal { |
20 | | namespace { |
21 | | |
22 | | constexpr char kSeparator = ','; |
23 | 0 | [[nodiscard]] constexpr bool IsLineEnd(char ch) { |
24 | 0 | return ch == '\r' || ch == '\n'; |
25 | 0 | } |
26 | | |
27 | | } // namespace |
28 | | |
29 | | std::optional<std::vector<std::string>> CsvParser::ParseCharacterOrEof( |
30 | 0 | int val) { |
31 | 0 | const char ch = static_cast<char>(val); |
32 | 0 | switch (state_) { |
33 | 0 | case kNewEntry: |
34 | 0 | if (ch == '"') { |
35 | 0 | state_ = kQuotedEntry; |
36 | 0 | } else if (IsLineEnd(ch)) { |
37 | 0 | if (line_.size() > 1) { // Ignore empty lines |
38 | 0 | return FinishLine(); |
39 | 0 | } |
40 | 0 | } else if (ch == kSeparator) { |
41 | 0 | line_.emplace_back(); // Append the empty entry, start the next |
42 | 0 | } else { |
43 | 0 | state_ = kUnquotedEntry; |
44 | 0 | line_.back().push_back(ch); |
45 | 0 | } |
46 | 0 | break; |
47 | 0 | case kUnquotedEntry: |
48 | 0 | if (val == kEndOfFile || IsLineEnd(ch)) { |
49 | 0 | return FinishLine(); |
50 | 0 | } |
51 | 0 | if (ch == kSeparator) { |
52 | 0 | state_ = kNewEntry; |
53 | 0 | line_.emplace_back(); |
54 | 0 | } else { |
55 | 0 | line_.back().push_back(ch); |
56 | 0 | } |
57 | 0 | break; |
58 | 0 | case kQuotedEntry: |
59 | 0 | if (val == kEndOfFile) { |
60 | 0 | PW_LOG_WARN("Unexpected end-of-file in quoted entry; ignoring line"); |
61 | 0 | } else if (ch == '"') { |
62 | 0 | state_ = kQuotedEntryQuote; |
63 | 0 | } else { |
64 | 0 | line_.back().push_back(ch); |
65 | 0 | } |
66 | 0 | break; |
67 | 0 | case kQuotedEntryQuote: |
68 | 0 | if (ch == '"') { |
69 | 0 | state_ = kQuotedEntry; |
70 | 0 | line_.back().push_back('"'); |
71 | 0 | } else if (val == kEndOfFile || IsLineEnd(ch)) { |
72 | 0 | return FinishLine(); |
73 | 0 | } else if (ch == kSeparator) { |
74 | 0 | state_ = kNewEntry; |
75 | 0 | line_.emplace_back(); |
76 | 0 | } else { |
77 | 0 | PW_LOG_WARN( |
78 | 0 | "Unexpected character '%c' after quoted entry; expected ',' or a " |
79 | 0 | "line ending; skipping line", |
80 | 0 | ch); |
81 | 0 | state_ = kError; |
82 | 0 | line_.clear(); |
83 | 0 | line_.emplace_back(); |
84 | 0 | } |
85 | 0 | break; |
86 | 0 | case kError: |
87 | 0 | if (IsLineEnd(ch)) { // Skip chars until end-of-line |
88 | 0 | state_ = kNewEntry; |
89 | 0 | } |
90 | 0 | break; |
91 | 0 | } |
92 | 0 | return std::nullopt; |
93 | 0 | } |
94 | | |
95 | 0 | std::optional<std::vector<std::string>> CsvParser::FinishLine() { |
96 | 0 | state_ = kNewEntry; |
97 | 0 | std::optional<std::vector<std::string>> completed_line = std::move(line_); |
98 | 0 | line_.clear(); |
99 | 0 | line_.emplace_back(); |
100 | 0 | return completed_line; |
101 | 0 | } |
102 | | |
103 | | } // namespace pw::tokenizer::internal |