/src/logging-log4cxx/src/main/cpp/patternparser.cpp
Line | Count | Source |
1 | | /* |
2 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | * contributor license agreements. See the NOTICE file distributed with |
4 | | * this work for additional information regarding copyright ownership. |
5 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | * (the "License"); you may not use this file except in compliance with |
7 | | * the License. You may obtain a copy of the License at |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #include <log4cxx/logstring.h> |
19 | | #include <log4cxx/pattern/patternparser.h> |
20 | | #include <log4cxx/pattern/literalpatternconverter.h> |
21 | | #include <log4cxx/helpers/loglog.h> |
22 | | |
23 | | using namespace LOG4CXX_NS; |
24 | | using namespace LOG4CXX_NS::pattern; |
25 | | using namespace LOG4CXX_NS::helpers; |
26 | | |
27 | | const logchar PatternParser::ESCAPE_CHAR = 0x25; // '%' |
28 | | |
29 | | |
30 | | /** |
31 | | * Private constructor. |
32 | | */ |
33 | | PatternParser::PatternParser() |
34 | 0 | { |
35 | 0 | } |
36 | | |
37 | | bool PatternParser::isUnicodeIdentifierStart(logchar ch) |
38 | 679k | { |
39 | | // |
40 | | // greatly simplified version checks if |
41 | | // character is USACII alpha or number |
42 | | // |
43 | 679k | return (ch >= 0x41 /* 'A' */ && ch <= 0x5A /* 'Z' */) || |
44 | 609k | (ch >= 0x61 /* 'a' */ && ch <= 0x7A /* 'z' */) || |
45 | 173k | (ch >= 0x30 /* '0' */ && ch <= 0x39 /* '9' */); |
46 | 679k | } |
47 | | |
48 | | bool PatternParser::isUnicodeIdentifierPart(logchar ch) |
49 | 557k | { |
50 | | // |
51 | | // greatly simplified version checks if |
52 | | // character is USACII alpha or number |
53 | | // |
54 | 557k | return isUnicodeIdentifierStart(ch) |
55 | 115k | || (ch == 0x5F /* '_' */); |
56 | 557k | } |
57 | | |
58 | | size_t PatternParser::extractConverter( |
59 | | logchar lastChar, const LogString& pattern, |
60 | | LogString::size_type i, LogString& convBuf, |
61 | | LogString& currentLiteral) |
62 | 122k | { |
63 | 122k | if (!convBuf.empty()) |
64 | 0 | { |
65 | 0 | convBuf.erase(convBuf.begin(), convBuf.end()); |
66 | 0 | } |
67 | | |
68 | | // When this method is called, lastChar points to the first character of the |
69 | | // conversion word. For example: |
70 | | // For "%hello" lastChar = 'h' |
71 | | // For "%-5hello" lastChar = 'h' |
72 | | //System.out.println("lastchar is "+lastChar); |
73 | 122k | if (!isUnicodeIdentifierStart(lastChar)) |
74 | 6.40k | { |
75 | 6.40k | return i; |
76 | 6.40k | } |
77 | | |
78 | 115k | convBuf.append(1, lastChar); |
79 | | |
80 | 115k | while ( |
81 | 558k | (i < pattern.length()) |
82 | 557k | && isUnicodeIdentifierPart(pattern[i])) |
83 | 442k | { |
84 | 442k | convBuf.append(1, pattern[i]); |
85 | 442k | currentLiteral.append(1, pattern[i]); |
86 | | |
87 | | //System.out.println("conv buffer is now ["+convBuf+"]."); |
88 | 442k | i++; |
89 | 442k | } |
90 | | |
91 | 115k | return i; |
92 | 122k | } |
93 | | |
94 | | |
95 | | size_t PatternParser::extractOptions(const LogString& pattern, LogString::size_type i, |
96 | | std::vector<LogString>& options) |
97 | 115k | { |
98 | 180k | while ((i < pattern.length()) && (pattern[i] == 0x7B /* '{' */)) |
99 | 65.3k | { |
100 | 65.3k | size_t end = pattern.find(0x7D /* '}' */, i); |
101 | | |
102 | 65.3k | if (end == pattern.npos) |
103 | 515 | { |
104 | 515 | break; |
105 | 515 | } |
106 | | |
107 | 64.8k | LogString r(pattern.substr(i + 1, end - i - 1)); |
108 | 64.8k | options.push_back(r); |
109 | 64.8k | i = end + 1; |
110 | 64.8k | } |
111 | | |
112 | 115k | return i; |
113 | 115k | } |
114 | | |
115 | | void PatternParser::parse( |
116 | | const LogString& pattern, |
117 | | std::vector<PatternConverterPtr>& patternConverters, |
118 | | std::vector<FormattingInfoPtr>& formattingInfos, |
119 | | const PatternMap& rules) |
120 | 3.39k | { |
121 | | |
122 | 3.39k | LogString currentLiteral; |
123 | | |
124 | 3.39k | size_t patternLength = pattern.length(); |
125 | 3.39k | int state = LITERAL_STATE; |
126 | 3.39k | logchar c; |
127 | 3.39k | size_t i = 0; |
128 | 3.39k | int minDigitCount{ 0 }, maxDigitCount{ 0 }; |
129 | 3.39k | FormattingInfoPtr formattingInfo(FormattingInfo::getDefault()); |
130 | | |
131 | 664k | while (i < patternLength) |
132 | 660k | { |
133 | 660k | c = pattern[i++]; |
134 | | |
135 | 660k | switch (state) |
136 | 660k | { |
137 | 516k | case LITERAL_STATE: |
138 | | |
139 | | // In literal state, the last char is always a literal. |
140 | 516k | if (i == patternLength) |
141 | 372 | { |
142 | 372 | currentLiteral.append(1, c); |
143 | | |
144 | 372 | continue; |
145 | 372 | } |
146 | | |
147 | 516k | if (c == ESCAPE_CHAR) |
148 | 124k | { |
149 | | // peek at the next char. |
150 | 124k | if (pattern[i] == ESCAPE_CHAR) |
151 | 1.72k | { |
152 | 1.72k | currentLiteral.append(1, c); |
153 | 1.72k | i++; // move pointer |
154 | 1.72k | } |
155 | 123k | else |
156 | 123k | { |
157 | 123k | if (!currentLiteral.empty()) |
158 | 31.4k | { |
159 | 31.4k | patternConverters.push_back( |
160 | 31.4k | LiteralPatternConverter::newInstance(currentLiteral)); |
161 | 31.4k | formattingInfos.push_back(FormattingInfo::getDefault()); |
162 | 31.4k | currentLiteral.erase(currentLiteral.begin(), currentLiteral.end()); |
163 | 31.4k | } |
164 | | |
165 | 123k | currentLiteral.append(1, c); // append % |
166 | 123k | state = CONVERTER_STATE; |
167 | 123k | formattingInfo = FormattingInfo::getDefault(); |
168 | 123k | } |
169 | 124k | } |
170 | 391k | else |
171 | 391k | { |
172 | 391k | currentLiteral.append(1, c); |
173 | 391k | } |
174 | | |
175 | 516k | break; |
176 | | |
177 | 133k | case CONVERTER_STATE: |
178 | 133k | currentLiteral.append(1, c); |
179 | | |
180 | 133k | switch (c) |
181 | 133k | { |
182 | 10.0k | case 0x2D: // '-' |
183 | 10.0k | formattingInfo = std::make_shared<FormattingInfo>( |
184 | 10.0k | true, formattingInfo->getMinLength(), |
185 | 10.0k | formattingInfo->getMaxLength()); |
186 | | |
187 | 10.0k | break; |
188 | | |
189 | 1.71k | case 0x2E: // '.' |
190 | 1.71k | state = DOT_STATE; |
191 | | |
192 | 1.71k | break; |
193 | | |
194 | 121k | default: |
195 | | |
196 | 121k | if ((c >= 0x30 /* '0' */) && (c <= 0x39 /* '9' */)) |
197 | 6.01k | { |
198 | 6.01k | formattingInfo = std::make_shared<FormattingInfo>( |
199 | 6.01k | formattingInfo->isLeftAligned(), c - 0x30 /* '0' */, |
200 | 6.01k | formattingInfo->getMaxLength()); |
201 | 6.01k | state = MIN_STATE; |
202 | 6.01k | minDigitCount = 1; |
203 | 6.01k | } |
204 | 115k | else |
205 | 115k | { |
206 | 115k | i = finalizeConverter( |
207 | 115k | c, pattern, i, currentLiteral, formattingInfo, |
208 | 115k | rules, patternConverters, formattingInfos); |
209 | | |
210 | | // Next pattern is assumed to be a literal. |
211 | 115k | state = LITERAL_STATE; |
212 | 115k | formattingInfo = FormattingInfo::getDefault(); |
213 | | |
214 | 115k | if (!currentLiteral.empty()) |
215 | 0 | { |
216 | 0 | currentLiteral.erase(currentLiteral.begin(), currentLiteral.end()); |
217 | 0 | } |
218 | 115k | } |
219 | 133k | } // switch |
220 | | |
221 | 133k | break; |
222 | | |
223 | 133k | case MIN_STATE: |
224 | 7.01k | currentLiteral.append(1, c); |
225 | | |
226 | 7.01k | if ((c >= 0x30 /* '0' */) && (c <= 0x39 /* '9' */) && minDigitCount < 3) |
227 | 1.03k | { |
228 | 1.03k | formattingInfo = std::make_shared<FormattingInfo>( |
229 | 1.03k | formattingInfo->isLeftAligned(), |
230 | 1.03k | (formattingInfo->getMinLength() * 10) + (c - 0x30 /* '0' */), |
231 | 1.03k | formattingInfo->getMaxLength()); |
232 | 1.03k | ++minDigitCount; |
233 | 1.03k | } |
234 | 5.97k | else if (c == 0x2E /* '.' */) |
235 | 491 | { |
236 | 491 | state = DOT_STATE; |
237 | 491 | } |
238 | 5.48k | else |
239 | 5.48k | { |
240 | 5.48k | i = finalizeConverter( |
241 | 5.48k | c, pattern, i, currentLiteral, formattingInfo, |
242 | 5.48k | rules, patternConverters, formattingInfos); |
243 | 5.48k | state = LITERAL_STATE; |
244 | 5.48k | formattingInfo = FormattingInfo::getDefault(); |
245 | | |
246 | 5.48k | if (!currentLiteral.empty()) |
247 | 0 | { |
248 | 0 | currentLiteral.erase(currentLiteral.begin(), currentLiteral.end()); |
249 | 0 | } |
250 | 5.48k | } |
251 | | |
252 | 7.01k | break; |
253 | | |
254 | 2.18k | case DOT_STATE: |
255 | 2.18k | currentLiteral.append(1, c); |
256 | | |
257 | 2.18k | if ((c >= 0x30 /* '0' */) && (c <= 0x39 /* '9' */)) |
258 | 1.47k | { |
259 | 1.47k | formattingInfo = std::make_shared<FormattingInfo>( |
260 | 1.47k | formattingInfo->isLeftAligned(), formattingInfo->getMinLength(), |
261 | 1.47k | c - 0x30 /* '0' */); |
262 | 1.47k | state = MAX_STATE; |
263 | 1.47k | maxDigitCount = 1; |
264 | 1.47k | } |
265 | 718 | else |
266 | 718 | { |
267 | 718 | LogLog::error(LOG4CXX_STR("Error in pattern, was expecting digit.")); |
268 | | |
269 | 718 | state = LITERAL_STATE; |
270 | 718 | } |
271 | | |
272 | 2.18k | break; |
273 | | |
274 | 2.25k | case MAX_STATE: |
275 | 2.25k | currentLiteral.append(1, c); |
276 | | |
277 | 2.25k | if ((c >= 0x30 /* '0' */) && (c <= 0x39 /* '9' */) && maxDigitCount < 3) |
278 | 828 | { |
279 | 828 | formattingInfo = std::make_shared<FormattingInfo>( |
280 | 828 | formattingInfo->isLeftAligned(), formattingInfo->getMinLength(), |
281 | 828 | (formattingInfo->getMaxLength() * 10) + (c - 0x30 /* '0' */)); |
282 | 828 | ++maxDigitCount; |
283 | 828 | } |
284 | 1.42k | else |
285 | 1.42k | { |
286 | 1.42k | i = finalizeConverter( |
287 | 1.42k | c, pattern, i, currentLiteral, formattingInfo, |
288 | 1.42k | rules, patternConverters, formattingInfos); |
289 | 1.42k | state = LITERAL_STATE; |
290 | 1.42k | formattingInfo = FormattingInfo::getDefault(); |
291 | | |
292 | 1.42k | if (!currentLiteral.empty()) |
293 | 0 | { |
294 | 0 | currentLiteral.erase(currentLiteral.begin(), currentLiteral.end()); |
295 | 0 | } |
296 | 1.42k | } |
297 | | |
298 | 2.25k | break; |
299 | 660k | } // switch |
300 | 660k | } |
301 | | |
302 | | // while |
303 | 3.39k | if (currentLiteral.length() != 0) |
304 | 516 | { |
305 | 516 | patternConverters.push_back( |
306 | 516 | LiteralPatternConverter::newInstance(currentLiteral)); |
307 | 516 | formattingInfos.push_back(FormattingInfo::getDefault()); |
308 | 516 | } |
309 | 3.39k | } |
310 | | |
311 | | |
312 | | PatternConverterPtr PatternParser::createConverter( |
313 | | const LogString& converterId, |
314 | | LogString& currentLiteral, |
315 | | const PatternMap& rules, |
316 | | std::vector<LogString>& options) |
317 | 115k | { |
318 | | |
319 | 115k | LogString converterName(converterId); |
320 | | |
321 | 557k | for (size_t i = converterId.length(); i > 0; i--) |
322 | 546k | { |
323 | 546k | converterName = converterName.substr(0, i); |
324 | 546k | PatternMap::const_iterator iter = rules.find(converterName); |
325 | | |
326 | 546k | if (iter != rules.end()) |
327 | 104k | { |
328 | 104k | currentLiteral.erase(currentLiteral.begin(), |
329 | 104k | currentLiteral.end() - (converterId.length() - i)); |
330 | 104k | return (iter->second)(options); |
331 | 104k | } |
332 | 546k | } |
333 | | |
334 | 11.3k | LogLog::error(LogString(LOG4CXX_STR("Unrecognized format specifier ")) + converterId); |
335 | | |
336 | 11.3k | return PatternConverterPtr(); |
337 | 115k | } |
338 | | |
339 | | size_t PatternParser::finalizeConverter( |
340 | | logchar c, const LogString& pattern, size_t i, |
341 | | LogString& currentLiteral, const FormattingInfoPtr& formattingInfo, |
342 | | const PatternMap& rules, |
343 | | std::vector<PatternConverterPtr>& patternConverters, |
344 | | std::vector<FormattingInfoPtr>& formattingInfos) |
345 | 122k | { |
346 | 122k | LogString convBuf; |
347 | 122k | i = extractConverter(c, pattern, i, convBuf, currentLiteral); |
348 | | |
349 | 122k | if (convBuf.empty()) |
350 | 6.40k | { |
351 | 6.40k | LogLog::error(LOG4CXX_STR("Empty conversion specifier")); |
352 | 6.40k | patternConverters.push_back( |
353 | 6.40k | LiteralPatternConverter::newInstance(currentLiteral)); |
354 | 6.40k | formattingInfos.push_back(FormattingInfo::getDefault()); |
355 | 6.40k | } |
356 | 115k | else |
357 | 115k | { |
358 | 115k | LogString converterId(convBuf); |
359 | | |
360 | 115k | std::vector<LogString> options; |
361 | 115k | i = extractOptions(pattern, i, options); |
362 | | |
363 | 115k | PatternConverterPtr pc( |
364 | 115k | createConverter( |
365 | 115k | converterId, currentLiteral, rules, options)); |
366 | | |
367 | 115k | if (pc == NULL) |
368 | 11.3k | { |
369 | 11.3k | LogString msg(LOG4CXX_STR("Unrecognized conversion specifier [")); |
370 | 11.3k | msg.append(converterId); |
371 | 11.3k | msg.append(LOG4CXX_STR("] in conversion pattern.")); |
372 | 11.3k | LogLog::error(msg); |
373 | 11.3k | patternConverters.push_back( |
374 | 11.3k | LiteralPatternConverter::newInstance(currentLiteral)); |
375 | 11.3k | formattingInfos.push_back(FormattingInfo::getDefault()); |
376 | 11.3k | } |
377 | 104k | else |
378 | 104k | { |
379 | 104k | patternConverters.push_back(pc); |
380 | 104k | formattingInfos.push_back(formattingInfo); |
381 | | |
382 | 104k | if (currentLiteral.length() > 0) |
383 | 31.6k | { |
384 | 31.6k | patternConverters.push_back( |
385 | 31.6k | LiteralPatternConverter::newInstance(currentLiteral)); |
386 | 31.6k | formattingInfos.push_back(FormattingInfo::getDefault()); |
387 | 31.6k | } |
388 | 104k | } |
389 | 115k | } |
390 | | |
391 | 122k | if (!currentLiteral.empty()) |
392 | 49.4k | { |
393 | 49.4k | currentLiteral.erase(currentLiteral.begin(), currentLiteral.end()); |
394 | 49.4k | } |
395 | | |
396 | 122k | return i; |
397 | 122k | } |