/src/yaml-cpp/src/scanscalar.cpp
Line | Count | Source |
1 | | #include "scanscalar.h" |
2 | | |
3 | | #include <algorithm> |
4 | | |
5 | | #include "exp.h" |
6 | | #include "regeximpl.h" |
7 | | #include "stream.h" |
8 | | #include "yaml-cpp/exceptions.h" // IWYU pragma: keep |
9 | | |
10 | | namespace YAML { |
11 | | // ScanScalar |
12 | | // . This is where the scalar magic happens. |
13 | | // |
14 | | // . We do the scanning in three phases: |
15 | | // 1. Scan until newline |
16 | | // 2. Eat newline |
17 | | // 3. Scan leading blanks. |
18 | | // |
19 | | // . Depending on the parameters given, we store or stop |
20 | | // and different places in the above flow. |
21 | 1.37M | std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { |
22 | 1.37M | bool foundNonEmptyLine = false; |
23 | 1.37M | bool pastOpeningBreak = (params.fold == FOLD_FLOW); |
24 | 1.37M | bool emptyLine = false, moreIndented = false; |
25 | 1.37M | int foldedNewlineCount = 0; |
26 | 1.37M | bool foldedNewlineStartedMoreIndented = false; |
27 | 1.37M | std::size_t lastEscapedChar = std::string::npos; |
28 | 1.37M | std::string scalar; |
29 | 1.37M | params.leadingSpaces = false; |
30 | | |
31 | 1.37M | if (!params.end) { |
32 | 1.88k | params.end = &Exp::Empty(); |
33 | 1.88k | } |
34 | | |
35 | 7.67M | while (INPUT) { |
36 | | // ******************************** |
37 | | // Phase #1: scan until line ending |
38 | | |
39 | 7.67M | std::size_t lastNonWhitespaceChar = scalar.size(); |
40 | 7.67M | bool escapedNewline = false; |
41 | 68.3M | while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) { |
42 | 60.6M | if (!INPUT) { |
43 | 1.58k | break; |
44 | 1.58k | } |
45 | | |
46 | | // document indicator? |
47 | 60.6M | if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { |
48 | 41.6k | if (params.onDocIndicator == BREAK) { |
49 | 41.6k | break; |
50 | 41.6k | } |
51 | 1 | if (params.onDocIndicator == THROW) { |
52 | 1 | throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); |
53 | 1 | } |
54 | 1 | } |
55 | | |
56 | 60.6M | foundNonEmptyLine = true; |
57 | 60.6M | pastOpeningBreak = true; |
58 | | |
59 | | // escaped newline? (only if we're escaping on slash) |
60 | 60.6M | if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { |
61 | | // eat escape character and get out (but preserve trailing whitespace!) |
62 | 619 | INPUT.get(); |
63 | 619 | lastNonWhitespaceChar = scalar.size(); |
64 | 619 | lastEscapedChar = scalar.size(); |
65 | 619 | escapedNewline = true; |
66 | 619 | break; |
67 | 619 | } |
68 | | |
69 | | // escape this? |
70 | 60.6M | if (INPUT.peek() == params.escape) { |
71 | 38.3k | scalar += Exp::Escape(INPUT); |
72 | 38.3k | lastNonWhitespaceChar = scalar.size(); |
73 | 38.3k | lastEscapedChar = scalar.size(); |
74 | 38.3k | continue; |
75 | 38.3k | } |
76 | | |
77 | | // otherwise, just add the damn character |
78 | 60.6M | char ch = INPUT.get(); |
79 | 60.6M | scalar += ch; |
80 | 60.6M | if (ch != ' ' && ch != '\t') { |
81 | 58.4M | lastNonWhitespaceChar = scalar.size(); |
82 | 58.4M | } |
83 | 60.6M | } |
84 | | |
85 | | // eof? if we're looking to eat something, then we throw |
86 | 7.67M | if (!INPUT) { |
87 | 1.65k | if (params.eatEnd) { |
88 | 199 | throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); |
89 | 199 | } |
90 | 1.45k | break; |
91 | 1.65k | } |
92 | | |
93 | | // doc indicator? |
94 | 7.67M | if (params.onDocIndicator == BREAK && INPUT.column() == 0 && |
95 | 3.85M | Exp::DocIndicator().Matches(INPUT)) { |
96 | 41.6k | break; |
97 | 41.6k | } |
98 | | |
99 | | // are we done via character match? |
100 | 7.63M | int n = params.end->Match(INPUT); |
101 | 7.63M | if (n >= 0) { |
102 | 428k | if (params.eatEnd) { |
103 | 5.94k | INPUT.eat(n); |
104 | 5.94k | } |
105 | 428k | break; |
106 | 428k | } |
107 | | |
108 | | // do we remove trailing whitespace? |
109 | 7.20M | if (params.fold == FOLD_FLOW) |
110 | 5.46M | scalar.erase(lastNonWhitespaceChar); |
111 | | |
112 | | // ******************************** |
113 | | // Phase #2: eat line ending |
114 | 7.20M | n = Exp::Break().Match(INPUT); |
115 | 7.20M | INPUT.eat(n); |
116 | | |
117 | | // ******************************** |
118 | | // Phase #3: scan initial spaces |
119 | | |
120 | | // first the required indentation |
121 | 7.20M | while (INPUT.peek() == ' ' && |
122 | 3.80k | (INPUT.column() < params.indent || |
123 | 3.37k | (params.detectIndent && !foundNonEmptyLine)) && |
124 | 832 | !params.end->Matches(INPUT)) { |
125 | 633 | INPUT.eat(1); |
126 | 633 | } |
127 | | |
128 | | // update indent if we're auto-detecting |
129 | 7.20M | if (params.detectIndent && !foundNonEmptyLine) { |
130 | 1.44M | params.indent = std::max(params.indent, INPUT.column()); |
131 | 1.44M | } |
132 | | |
133 | | // and then the rest of the whitespace |
134 | 7.20M | while (Exp::Blank().Matches(INPUT)) { |
135 | | // we check for tabs that masquerade as indentation |
136 | 7.29k | if (INPUT.peek() == '\t' && INPUT.column() < params.indent && |
137 | 1 | params.onTabInIndentation == THROW) { |
138 | 1 | throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); |
139 | 1 | } |
140 | | |
141 | 7.29k | if (!params.eatLeadingWhitespace) { |
142 | 2.23k | break; |
143 | 2.23k | } |
144 | | |
145 | 5.05k | if (params.end->Matches(INPUT)) { |
146 | 259 | break; |
147 | 259 | } |
148 | | |
149 | 4.79k | INPUT.eat(1); |
150 | 4.79k | } |
151 | | |
152 | | // was this an empty line? |
153 | 7.20M | bool nextEmptyLine = Exp::Break().Matches(INPUT); |
154 | 7.20M | bool nextMoreIndented = Exp::Blank().Matches(INPUT); |
155 | 7.20M | if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) |
156 | 2.97k | foldedNewlineStartedMoreIndented = moreIndented; |
157 | | |
158 | | // for block scalars, we always start with a newline, so we should ignore it |
159 | | // (not fold or keep) |
160 | 7.20M | if (pastOpeningBreak) { |
161 | 7.20M | switch (params.fold) { |
162 | 1.00k | case DONT_FOLD: |
163 | 1.00k | scalar += "\n"; |
164 | 1.00k | break; |
165 | 1.73M | case FOLD_BLOCK: |
166 | 1.73M | if (!emptyLine && !nextEmptyLine && !moreIndented && |
167 | 22.2k | !nextMoreIndented && INPUT.column() >= params.indent) { |
168 | 21.9k | scalar += " "; |
169 | 1.71M | } else if (nextEmptyLine) { |
170 | 1.70M | foldedNewlineCount++; |
171 | 1.70M | } else { |
172 | 4.13k | scalar += "\n"; |
173 | 4.13k | } |
174 | | |
175 | 1.73M | if (!nextEmptyLine && foldedNewlineCount > 0) { |
176 | 2.76k | scalar += std::string(foldedNewlineCount - 1, '\n'); |
177 | 2.76k | if (foldedNewlineStartedMoreIndented || |
178 | 1.53k | nextMoreIndented | !foundNonEmptyLine) { |
179 | 1.50k | scalar += "\n"; |
180 | 1.50k | } |
181 | 2.76k | foldedNewlineCount = 0; |
182 | 2.76k | } |
183 | 1.73M | break; |
184 | 5.46M | case FOLD_FLOW: |
185 | 5.46M | if (nextEmptyLine) { |
186 | 4.44M | scalar += "\n"; |
187 | 4.44M | } else if (!emptyLine && !escapedNewline) { |
188 | 1.01M | scalar += " "; |
189 | 1.01M | } |
190 | 5.46M | break; |
191 | 7.20M | } |
192 | 7.20M | } |
193 | | |
194 | 7.20M | emptyLine = nextEmptyLine; |
195 | 7.20M | moreIndented = nextMoreIndented; |
196 | 7.20M | pastOpeningBreak = true; |
197 | | |
198 | | // are we done via indentation? |
199 | 7.20M | if (!emptyLine && INPUT.column() < params.indent) { |
200 | 900k | params.leadingSpaces = true; |
201 | 900k | break; |
202 | 900k | } |
203 | 7.20M | } |
204 | | |
205 | | // post-processing |
206 | 1.37M | if (params.trimTrailingSpaces) { |
207 | 1.36M | std::size_t pos = scalar.find_last_not_of(" \t"); |
208 | 1.36M | if (lastEscapedChar != std::string::npos) { |
209 | 13.7k | if (pos < lastEscapedChar || pos == std::string::npos) { |
210 | 9.07k | pos = lastEscapedChar; |
211 | 9.07k | } |
212 | 13.7k | } |
213 | 1.36M | if (pos < scalar.size()) { |
214 | 1.35M | scalar.erase(pos + 1); |
215 | 1.35M | } |
216 | 1.36M | } |
217 | | |
218 | 1.37M | switch (params.chomp) { |
219 | 7.89k | case CLIP: { |
220 | 7.89k | std::size_t pos = scalar.find_last_not_of('\n'); |
221 | 7.89k | if (lastEscapedChar != std::string::npos) { |
222 | 2.30k | if (pos < lastEscapedChar || pos == std::string::npos) { |
223 | 1.75k | pos = lastEscapedChar; |
224 | 1.75k | } |
225 | 2.30k | } |
226 | 7.89k | if (pos == std::string::npos) { |
227 | 3.44k | scalar.erase(); |
228 | 4.44k | } else if (pos + 1 < scalar.size()) { |
229 | 191 | scalar.erase(pos + 2); |
230 | 191 | } |
231 | 7.89k | } break; |
232 | 1.36M | case STRIP: { |
233 | 1.36M | std::size_t pos = scalar.find_last_not_of('\n'); |
234 | 1.36M | if (lastEscapedChar != std::string::npos) { |
235 | 13.7k | if (pos < lastEscapedChar || pos == std::string::npos) { |
236 | 8.35k | pos = lastEscapedChar; |
237 | 8.35k | } |
238 | 13.7k | } |
239 | 1.36M | if (pos == std::string::npos) { |
240 | 66 | scalar.erase(); |
241 | 1.36M | } else if (pos < scalar.size()) { |
242 | 1.35M | scalar.erase(pos + 1); |
243 | 1.35M | } |
244 | 1.36M | } break; |
245 | 22 | default: |
246 | 22 | break; |
247 | 1.37M | } |
248 | | |
249 | 1.37M | return scalar; |
250 | 1.37M | } |
251 | | } // namespace YAML |