/proc/self/cwd/pw_tokenizer/decode.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2020 The Pigweed Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
4 | | // use this file except in compliance with the License. You may obtain a copy of |
5 | | // the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
11 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
12 | | // License for the specific language governing permissions and limitations under |
13 | | // the License. |
14 | | |
15 | | #include "pw_tokenizer/internal/decode.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <array> |
19 | | #include <cctype> |
20 | | #include <cstring> |
21 | | #include <string> |
22 | | |
23 | | #include "pw_varint/varint.h" |
24 | | |
25 | | namespace pw::tokenizer { |
26 | | namespace { |
27 | | |
28 | | // Functions for parsing a printf format specifier. |
29 | 0 | size_t SkipFlags(const char* str) { |
30 | 0 | size_t i = 0; |
31 | 0 | while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' || |
32 | 0 | str[i] == '0') { |
33 | 0 | i += 1; |
34 | 0 | } |
35 | 0 | return i; |
36 | 0 | } |
37 | | |
38 | 0 | size_t SkipAsteriskOrInteger(const char* str) { |
39 | 0 | if (str[0] == '*') { |
40 | 0 | return 1; |
41 | 0 | } |
42 | | |
43 | 0 | size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0; |
44 | |
|
45 | 0 | while (std::isdigit(str[i])) { |
46 | 0 | i += 1; |
47 | 0 | } |
48 | 0 | return i; |
49 | 0 | } |
50 | | |
51 | 0 | std::array<char, 2> ReadLengthModifier(const char* str) { |
52 | | // Check for ll or hh. |
53 | 0 | if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) { |
54 | 0 | return {str[0], str[1]}; |
55 | 0 | } |
56 | 0 | if (std::strchr("hljztL", str[0]) != nullptr) { |
57 | 0 | return {str[0]}; |
58 | 0 | } |
59 | 0 | return {}; |
60 | 0 | } |
61 | | |
62 | | // Returns the error message that is used in place of a decoded arg when an |
63 | | // error occurs. |
64 | | std::string ErrorMessage(ArgStatus status, |
65 | | std::string_view spec, |
66 | 0 | std::string_view value) { |
67 | 0 | const char* message; |
68 | 0 | if (status.HasError(ArgStatus::kSkipped)) { |
69 | 0 | message = "SKIPPED"; |
70 | 0 | } else if (status.HasError(ArgStatus::kMissing)) { |
71 | 0 | message = "MISSING"; |
72 | 0 | } else if (status.HasError(ArgStatus::kDecodeError)) { |
73 | 0 | message = "ERROR"; |
74 | 0 | } else { |
75 | 0 | message = "INTERNAL ERROR"; |
76 | 0 | } |
77 | |
|
78 | 0 | std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX); |
79 | 0 | result.append(spec); |
80 | 0 | result.push_back(' '); |
81 | 0 | result.append(message); |
82 | |
|
83 | 0 | if (!value.empty()) { |
84 | 0 | result.push_back(' '); |
85 | 0 | result.push_back('('); |
86 | 0 | result.append(value); |
87 | 0 | result.push_back(')'); |
88 | 0 | } |
89 | |
|
90 | 0 | result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX); |
91 | 0 | return result; |
92 | 0 | } |
93 | | |
94 | | } // namespace |
95 | | |
96 | | DecodedArg::DecodedArg(ArgStatus error, |
97 | | std::string_view spec, |
98 | | size_t raw_size_bytes, |
99 | | std::string_view value) |
100 | 0 | : value_(ErrorMessage(error, spec, value)), |
101 | 0 | spec_(spec), |
102 | 0 | raw_data_size_bytes_(raw_size_bytes), |
103 | 0 | status_(error) {} |
104 | | |
105 | 13 | StringSegment StringSegment::ParseFormatSpec(const char* format) { |
106 | 13 | if (format[0] != '%' || format[1] == '\0') { |
107 | 13 | return StringSegment(); |
108 | 13 | } |
109 | | |
110 | | // Parse the format specifier. |
111 | 0 | size_t i = 1; |
112 | | |
113 | | // Skip the flags. |
114 | 0 | i += SkipFlags(&format[i]); |
115 | | |
116 | | // Skip the field width. |
117 | 0 | i += SkipAsteriskOrInteger(&format[i]); |
118 | | |
119 | | // Skip the precision. |
120 | 0 | if (format[i] == '.') { |
121 | 0 | i += 1; |
122 | 0 | i += SkipAsteriskOrInteger(&format[i]); |
123 | 0 | } |
124 | | |
125 | | // Read the length modifier. |
126 | 0 | const std::array<char, 2> length = ReadLengthModifier(&format[i]); |
127 | 0 | i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1); |
128 | | |
129 | | // Read the conversion specifier. |
130 | 0 | const char spec = format[i]; |
131 | |
|
132 | 0 | Type type; |
133 | 0 | if (spec == 's') { |
134 | 0 | type = kString; |
135 | 0 | } else if (spec == 'c' || spec == 'd' || spec == 'i') { |
136 | 0 | type = kSignedInt; |
137 | 0 | } else if (std::strchr("oxXup", spec) != nullptr) { |
138 | | // The source size matters for unsigned integers because they need to be |
139 | | // masked off to their correct length, since zig-zag decode sign extends. |
140 | | // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p |
141 | | // needs to be 64-bit on these targets. |
142 | 0 | type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32; |
143 | 0 | } else if (std::strchr("fFeEaAgG", spec) != nullptr) { |
144 | 0 | type = kFloatingPoint; |
145 | 0 | } else if (spec == '%' && i == 1) { |
146 | 0 | type = kPercent; |
147 | 0 | } else { |
148 | 0 | return StringSegment(); |
149 | 0 | } |
150 | | |
151 | 0 | return {std::string_view(format, i + 1), type, VarargSize(length, spec)}; |
152 | 0 | } |
153 | | |
154 | | StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length, |
155 | 0 | char spec) { |
156 | | // Use pointer size for %p or any other type (for which this doesn't matter). |
157 | 0 | if (std::strchr("cdioxXu", spec) == nullptr) { |
158 | 0 | return VarargSize<void*>(); |
159 | 0 | } |
160 | 0 | if (length[0] == 'l') { |
161 | 0 | return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>(); |
162 | 0 | } |
163 | 0 | if (length[0] == 'j') { |
164 | 0 | return VarargSize<intmax_t>(); |
165 | 0 | } |
166 | 0 | if (length[0] == 'z') { |
167 | 0 | return VarargSize<size_t>(); |
168 | 0 | } |
169 | 0 | if (length[0] == 't') { |
170 | 0 | return VarargSize<ptrdiff_t>(); |
171 | 0 | } |
172 | 0 | return VarargSize<int>(); |
173 | 0 | } |
174 | | |
175 | | DecodedArg StringSegment::DecodeString( |
176 | 0 | const span<const uint8_t>& arguments) const { |
177 | 0 | if (arguments.empty()) { |
178 | 0 | return DecodedArg(ArgStatus::kMissing, text_); |
179 | 0 | } |
180 | | |
181 | 0 | ArgStatus status = |
182 | 0 | (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated; |
183 | |
|
184 | 0 | const uint_fast8_t size = arguments[0] & 0x7Fu; |
185 | |
|
186 | 0 | if (arguments.size() - 1 < size) { |
187 | 0 | status.Update(ArgStatus::kDecodeError); |
188 | 0 | span<const uint8_t> arg_val = arguments.subspan(1); |
189 | 0 | return DecodedArg( |
190 | 0 | status, |
191 | 0 | text_, |
192 | 0 | arguments.size(), |
193 | 0 | {reinterpret_cast<const char*>(arg_val.data()), arg_val.size()}); |
194 | 0 | } |
195 | | |
196 | 0 | std::string value(reinterpret_cast<const char*>(arguments.data() + 1), size); |
197 | |
|
198 | 0 | if (status.HasError(ArgStatus::kTruncated)) { |
199 | 0 | value.append("[...]"); |
200 | 0 | } |
201 | |
|
202 | 0 | return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status); |
203 | 0 | } |
204 | | |
205 | | DecodedArg StringSegment::DecodeInteger( |
206 | 0 | const span<const uint8_t>& arguments) const { |
207 | 0 | if (arguments.empty()) { |
208 | 0 | return DecodedArg(ArgStatus::kMissing, text_); |
209 | 0 | } |
210 | | |
211 | 0 | int64_t value; |
212 | 0 | const size_t bytes = varint::Decode(as_bytes(arguments), &value); |
213 | |
|
214 | 0 | if (bytes == 0u) { |
215 | 0 | return DecodedArg(ArgStatus::kDecodeError, |
216 | 0 | text_, |
217 | 0 | std::min(varint::kMaxVarint64SizeBytes, |
218 | 0 | static_cast<size_t>(arguments.size()))); |
219 | 0 | } |
220 | | |
221 | | // Unsigned ints need to be masked to their bit width due to sign extension. |
222 | 0 | if (type_ == kUnsigned32) { |
223 | 0 | value &= 0xFFFFFFFFu; |
224 | 0 | } |
225 | |
|
226 | 0 | if (local_size_ == k32Bit) { |
227 | 0 | return DecodedArg::FromValue( |
228 | 0 | text_.c_str(), static_cast<uint32_t>(value), bytes); |
229 | 0 | } |
230 | 0 | return DecodedArg::FromValue(text_.c_str(), value, bytes); |
231 | 0 | } |
232 | | |
233 | | DecodedArg StringSegment::DecodeFloatingPoint( |
234 | 0 | const span<const uint8_t>& arguments) const { |
235 | 0 | static_assert(sizeof(float) == 4u); |
236 | 0 | if (arguments.size() < sizeof(float)) { |
237 | 0 | return DecodedArg(ArgStatus::kMissing, text_); |
238 | 0 | } |
239 | | |
240 | 0 | float value; |
241 | 0 | std::memcpy(&value, arguments.data(), sizeof(value)); |
242 | 0 | return DecodedArg::FromValue(text_.c_str(), value, sizeof(value)); |
243 | 0 | } |
244 | | |
245 | 2.35k | DecodedArg StringSegment::Decode(const span<const uint8_t>& arguments) const { |
246 | 2.35k | switch (type_) { |
247 | 2.35k | case kLiteral: |
248 | 2.35k | return DecodedArg(text_); |
249 | 0 | case kPercent: |
250 | 0 | return DecodedArg("%"); |
251 | 0 | case kString: |
252 | 0 | return DecodeString(arguments); |
253 | 0 | case kSignedInt: |
254 | 0 | case kUnsigned32: |
255 | 0 | case kUnsigned64: |
256 | 0 | return DecodeInteger(arguments); |
257 | 0 | case kFloatingPoint: |
258 | 0 | return DecodeFloatingPoint(arguments); |
259 | 2.35k | } |
260 | | |
261 | 0 | return DecodedArg(ArgStatus::kDecodeError, text_); |
262 | 2.35k | } |
263 | | |
264 | 0 | DecodedArg StringSegment::Skip() const { |
265 | 0 | switch (type_) { |
266 | 0 | case kLiteral: |
267 | 0 | return DecodedArg(text_); |
268 | 0 | case kPercent: |
269 | 0 | return DecodedArg("%"); |
270 | 0 | case kString: |
271 | 0 | case kSignedInt: |
272 | 0 | case kUnsigned32: |
273 | 0 | case kUnsigned64: |
274 | 0 | case kFloatingPoint: |
275 | 0 | default: |
276 | 0 | return DecodedArg(ArgStatus::kSkipped, text_); |
277 | 0 | } |
278 | 0 | } |
279 | | |
280 | 2.35k | std::string DecodedFormatString::value() const { |
281 | 2.35k | std::string output; |
282 | | |
283 | 2.35k | for (const DecodedArg& arg : segments_) { |
284 | 2.35k | output.append(arg.ok() ? arg.value() : arg.spec()); |
285 | 2.35k | } |
286 | | |
287 | 2.35k | return output; |
288 | 2.35k | } |
289 | | |
290 | 0 | std::string DecodedFormatString::value_with_errors() const { |
291 | 0 | std::string output; |
292 | |
|
293 | 0 | for (const DecodedArg& arg : segments_) { |
294 | 0 | output.append(arg.value()); |
295 | 0 | } |
296 | |
|
297 | 0 | return output; |
298 | 0 | } |
299 | | |
300 | 0 | size_t DecodedFormatString::argument_count() const { |
301 | 0 | return static_cast<size_t>( |
302 | 0 | std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) { |
303 | 0 | return !arg.spec().empty(); |
304 | 0 | })); |
305 | 0 | } |
306 | | |
307 | 0 | size_t DecodedFormatString::decoding_errors() const { |
308 | 0 | return static_cast<size_t>( |
309 | 0 | std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) { |
310 | 0 | return !arg.ok(); |
311 | 0 | })); |
312 | 0 | } |
313 | | |
314 | 4 | FormatString::FormatString(const char* format) { |
315 | 4 | const char* text_start = format; |
316 | | |
317 | 17 | while (format[0] != '\0') { |
318 | 13 | if (StringSegment spec = StringSegment::ParseFormatSpec(format); |
319 | 13 | !spec.empty()) { |
320 | | // Add the text segment seen so far (if any). |
321 | 0 | if (text_start < format) { |
322 | 0 | segments_.emplace_back(std::string_view( |
323 | 0 | text_start, static_cast<size_t>(format - text_start))); |
324 | 0 | } |
325 | | |
326 | | // Move along the index and text segment start. |
327 | 0 | format += spec.text().size(); |
328 | 0 | text_start = format; |
329 | | |
330 | | // Add the format specifier that was just found. |
331 | 0 | segments_.push_back(std::move(spec)); |
332 | 13 | } else { |
333 | 13 | format += 1; |
334 | 13 | } |
335 | 13 | } |
336 | | |
337 | 4 | if (text_start < format) { |
338 | 4 | segments_.emplace_back( |
339 | 4 | std::string_view(text_start, static_cast<size_t>(format - text_start))); |
340 | 4 | } |
341 | 4 | } |
342 | | |
343 | 2.35k | DecodedFormatString FormatString::Format(span<const uint8_t> arguments) const { |
344 | 2.35k | std::vector<DecodedArg> results; |
345 | 2.35k | bool skip = false; |
346 | | |
347 | 2.35k | for (const auto& segment : segments_) { |
348 | 2.35k | if (skip) { |
349 | 0 | results.push_back(segment.Skip()); |
350 | 2.35k | } else { |
351 | 2.35k | results.push_back(segment.Decode(arguments)); |
352 | 2.35k | arguments = arguments.subspan(results.back().raw_size_bytes()); |
353 | | |
354 | | // If an error occurred, skip decoding the remaining arguments. |
355 | 2.35k | if (!results.back().ok()) { |
356 | 0 | skip = true; |
357 | 0 | } |
358 | 2.35k | } |
359 | 2.35k | } |
360 | | |
361 | 2.35k | return DecodedFormatString(std::move(results), arguments.size()); |
362 | 2.35k | } |
363 | | |
364 | | } // namespace pw::tokenizer |