/src/trafficserver/lib/yamlcpp/src/scanscalar.cpp
Line | Count | Source |
1 | | #include "scanscalar.h" |
2 | | |
3 | | #include <algorithm> |
4 | | |
5 | | #include "exp.h" |
6 | | #include "regeximpl.h" |
7 | | #include "stream.h" |
8 | | #include "yaml-cpp/exceptions.h" // IWYU pragma: keep |
9 | | |
10 | | namespace YAML { |
11 | | // ScanScalar |
12 | | // . This is where the scalar magic happens. |
13 | | // |
14 | | // . We do the scanning in three phases: |
15 | | // 1. Scan until newline |
16 | | // 2. Eat newline |
17 | | // 3. Scan leading blanks. |
18 | | // |
19 | | // . Depending on the parameters given, we store or stop |
20 | | // and different places in the above flow. |
21 | 42.3k | std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { |
22 | 42.3k | bool foundNonEmptyLine = false; |
23 | 42.3k | bool pastOpeningBreak = (params.fold == FOLD_FLOW); |
24 | 42.3k | bool emptyLine = false, moreIndented = false; |
25 | 42.3k | int foldedNewlineCount = 0; |
26 | 42.3k | bool foldedNewlineStartedMoreIndented = false; |
27 | 42.3k | std::size_t lastEscapedChar = std::string::npos; |
28 | 42.3k | std::string scalar; |
29 | 42.3k | params.leadingSpaces = false; |
30 | | |
31 | 42.3k | if (!params.end) { |
32 | 6.59k | params.end = &Exp::Empty(); |
33 | 6.59k | } |
34 | | |
35 | 107k | while (INPUT) { |
36 | | // ******************************** |
37 | | // Phase #1: scan until line ending |
38 | | |
39 | 105k | std::size_t lastNonWhitespaceChar = scalar.size(); |
40 | 105k | bool escapedNewline = false; |
41 | 396k | while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) { |
42 | 295k | if (!INPUT) { |
43 | 2.57k | break; |
44 | 2.57k | } |
45 | | |
46 | | // document indicator? |
47 | 293k | if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { |
48 | 396 | if (params.onDocIndicator == BREAK) { |
49 | 394 | break; |
50 | 394 | } |
51 | 2 | if (params.onDocIndicator == THROW) { |
52 | 2 | throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); |
53 | 2 | } |
54 | 2 | } |
55 | | |
56 | 292k | foundNonEmptyLine = true; |
57 | 292k | pastOpeningBreak = true; |
58 | | |
59 | | // escaped newline? (only if we're escaping on slash) |
60 | 292k | if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { |
61 | | // eat escape character and get out (but preserve trailing whitespace!) |
62 | 1.32k | INPUT.get(); |
63 | 1.32k | lastNonWhitespaceChar = scalar.size(); |
64 | 1.32k | lastEscapedChar = scalar.size(); |
65 | 1.32k | escapedNewline = true; |
66 | 1.32k | break; |
67 | 1.32k | } |
68 | | |
69 | | // escape this? |
70 | 291k | if (INPUT.peek() == params.escape) { |
71 | 45.2k | scalar += Exp::Escape(INPUT); |
72 | 45.2k | lastNonWhitespaceChar = scalar.size(); |
73 | 45.2k | lastEscapedChar = scalar.size(); |
74 | 45.2k | continue; |
75 | 45.2k | } |
76 | | |
77 | | // otherwise, just add the damn character |
78 | 246k | char ch = INPUT.get(); |
79 | 246k | scalar += ch; |
80 | 246k | if (ch != ' ' && ch != '\t') { |
81 | 233k | lastNonWhitespaceChar = scalar.size(); |
82 | 233k | } |
83 | 246k | } |
84 | | |
85 | | // eof? if we're looking to eat something, then we throw |
86 | 105k | if (!INPUT) { |
87 | 2.70k | if (params.eatEnd) { |
88 | 197 | throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); |
89 | 197 | } |
90 | 2.51k | break; |
91 | 2.70k | } |
92 | | |
93 | | // doc indicator? |
94 | 102k | if (params.onDocIndicator == BREAK && INPUT.column() == 0 && |
95 | 5.27k | Exp::DocIndicator().Matches(INPUT)) { |
96 | 394 | break; |
97 | 394 | } |
98 | | |
99 | | // are we done via character match? |
100 | 102k | int n = params.end->Match(INPUT); |
101 | 102k | if (n >= 0) { |
102 | 29.5k | if (params.eatEnd) { |
103 | 6.27k | INPUT.eat(n); |
104 | 6.27k | } |
105 | 29.5k | break; |
106 | 29.5k | } |
107 | | |
108 | | // do we remove trailing whitespace? |
109 | 72.7k | if (params.fold == FOLD_FLOW) |
110 | 14.7k | scalar.erase(lastNonWhitespaceChar); |
111 | | |
112 | | // ******************************** |
113 | | // Phase #2: eat line ending |
114 | 72.7k | n = Exp::Break().Match(INPUT); |
115 | 72.7k | INPUT.eat(n); |
116 | | |
117 | | // ******************************** |
118 | | // Phase #3: scan initial spaces |
119 | | |
120 | | // first the required indentation |
121 | 73.9k | while (INPUT.peek() == ' ' && |
122 | 4.34k | (INPUT.column() < params.indent || |
123 | 3.94k | (params.detectIndent && !foundNonEmptyLine)) && |
124 | 1.16k | !params.end->Matches(INPUT)) { |
125 | 1.16k | INPUT.eat(1); |
126 | 1.16k | } |
127 | | |
128 | | // update indent if we're auto-detecting |
129 | 72.7k | if (params.detectIndent && !foundNonEmptyLine) { |
130 | 29.2k | params.indent = std::max(params.indent, INPUT.column()); |
131 | 29.2k | } |
132 | | |
133 | | // and then the rest of the whitespace |
134 | 73.9k | while (Exp::Blank().Matches(INPUT)) { |
135 | | // we check for tabs that masquerade as indentation |
136 | 5.83k | if (INPUT.peek() == '\t' && INPUT.column() < params.indent && |
137 | 4 | params.onTabInIndentation == THROW) { |
138 | 4 | throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); |
139 | 4 | } |
140 | | |
141 | 5.83k | if (!params.eatLeadingWhitespace) { |
142 | 4.22k | break; |
143 | 4.22k | } |
144 | | |
145 | 1.61k | if (params.end->Matches(INPUT)) { |
146 | 389 | break; |
147 | 389 | } |
148 | | |
149 | 1.22k | INPUT.eat(1); |
150 | 1.22k | } |
151 | | |
152 | | // was this an empty line? |
153 | 72.7k | bool nextEmptyLine = Exp::Break().Matches(INPUT); |
154 | 72.7k | bool nextMoreIndented = Exp::Blank().Matches(INPUT); |
155 | 72.7k | if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) |
156 | 6.28k | foldedNewlineStartedMoreIndented = moreIndented; |
157 | | |
158 | | // for block scalars, we always start with a newline, so we should ignore it |
159 | | // (not fold or keep) |
160 | 72.7k | if (pastOpeningBreak) { |
161 | 65.8k | switch (params.fold) { |
162 | 1.88k | case DONT_FOLD: |
163 | 1.88k | scalar += "\n"; |
164 | 1.88k | break; |
165 | 49.2k | case FOLD_BLOCK: |
166 | 49.2k | if (!emptyLine && !nextEmptyLine && !moreIndented && |
167 | 3.59k | !nextMoreIndented && INPUT.column() >= params.indent) { |
168 | 2.29k | scalar += " "; |
169 | 46.9k | } else if (nextEmptyLine) { |
170 | 38.0k | foldedNewlineCount++; |
171 | 38.0k | } else { |
172 | 8.88k | scalar += "\n"; |
173 | 8.88k | } |
174 | | |
175 | 49.2k | if (!nextEmptyLine && foldedNewlineCount > 0) { |
176 | 5.01k | scalar += std::string(foldedNewlineCount - 1, '\n'); |
177 | 5.01k | if (foldedNewlineStartedMoreIndented || |
178 | 3.33k | nextMoreIndented | !foundNonEmptyLine) { |
179 | 3.18k | scalar += "\n"; |
180 | 3.18k | } |
181 | 5.01k | foldedNewlineCount = 0; |
182 | 5.01k | } |
183 | 49.2k | break; |
184 | 14.7k | case FOLD_FLOW: |
185 | 14.7k | if (nextEmptyLine) { |
186 | 8.17k | scalar += "\n"; |
187 | 8.17k | } else if (!emptyLine && !escapedNewline) { |
188 | 4.37k | scalar += " "; |
189 | 4.37k | } |
190 | 14.7k | break; |
191 | 65.8k | } |
192 | 65.8k | } |
193 | | |
194 | 72.7k | emptyLine = nextEmptyLine; |
195 | 72.7k | moreIndented = nextMoreIndented; |
196 | 72.7k | pastOpeningBreak = true; |
197 | | |
198 | | // are we done via indentation? |
199 | 72.7k | if (!emptyLine && INPUT.column() < params.indent) { |
200 | 8.04k | params.leadingSpaces = true; |
201 | 8.04k | break; |
202 | 8.04k | } |
203 | 72.7k | } |
204 | | |
205 | | // post-processing |
206 | 42.1k | if (params.trimTrailingSpaces) { |
207 | 28.6k | std::size_t pos = scalar.find_last_not_of(" \t"); |
208 | 28.6k | if (lastEscapedChar != std::string::npos) { |
209 | 3.54k | if (pos < lastEscapedChar || pos == std::string::npos) { |
210 | 2.49k | pos = lastEscapedChar; |
211 | 2.49k | } |
212 | 3.54k | } |
213 | 28.6k | if (pos < scalar.size()) { |
214 | 27.0k | scalar.erase(pos + 1); |
215 | 27.0k | } |
216 | 28.6k | } |
217 | | |
218 | 42.1k | switch (params.chomp) { |
219 | 11.5k | case CLIP: { |
220 | 11.5k | std::size_t pos = scalar.find_last_not_of('\n'); |
221 | 11.5k | if (lastEscapedChar != std::string::npos) { |
222 | 2.04k | if (pos < lastEscapedChar || pos == std::string::npos) { |
223 | 1.12k | pos = lastEscapedChar; |
224 | 1.12k | } |
225 | 2.04k | } |
226 | 11.5k | if (pos == std::string::npos) { |
227 | 7.17k | scalar.erase(); |
228 | 7.17k | } else if (pos + 1 < scalar.size()) { |
229 | 1.23k | scalar.erase(pos + 2); |
230 | 1.23k | } |
231 | 11.5k | } break; |
232 | 29.4k | case STRIP: { |
233 | 29.4k | std::size_t pos = scalar.find_last_not_of('\n'); |
234 | 29.4k | if (lastEscapedChar != std::string::npos) { |
235 | 3.59k | if (pos < lastEscapedChar || pos == std::string::npos) { |
236 | 1.66k | pos = lastEscapedChar; |
237 | 1.66k | } |
238 | 3.59k | } |
239 | 29.4k | if (pos == std::string::npos) { |
240 | 779 | scalar.erase(); |
241 | 28.6k | } else if (pos < scalar.size()) { |
242 | 27.1k | scalar.erase(pos + 1); |
243 | 27.1k | } |
244 | 29.4k | } break; |
245 | 645 | default: |
246 | 645 | break; |
247 | 42.1k | } |
248 | | |
249 | 41.6k | return scalar; |
250 | 42.1k | } |
251 | | } // namespace YAML |