Coverage Report

Created: 2025-06-13 06:30

/proc/self/cwd/pw_tokenizer/decode.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2020 The Pigweed Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4
// use this file except in compliance with the License. You may obtain a copy of
5
// the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12
// License for the specific language governing permissions and limitations under
13
// the License.
14
15
#include "pw_tokenizer/internal/decode.h"
16
17
#include <algorithm>
18
#include <array>
19
#include <cctype>
20
#include <cstring>
21
#include <string>
22
23
#include "pw_varint/varint.h"
24
25
namespace pw::tokenizer {
26
namespace {
27
28
// Functions for parsing a printf format specifier.
29
0
size_t SkipFlags(const char* str) {
30
0
  size_t i = 0;
31
0
  while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' ||
32
0
         str[i] == '0') {
33
0
    i += 1;
34
0
  }
35
0
  return i;
36
0
}
37
38
0
size_t SkipAsteriskOrInteger(const char* str) {
39
0
  if (str[0] == '*') {
40
0
    return 1;
41
0
  }
42
43
0
  size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0;
44
45
0
  while (std::isdigit(str[i])) {
46
0
    i += 1;
47
0
  }
48
0
  return i;
49
0
}
50
51
0
std::array<char, 2> ReadLengthModifier(const char* str) {
52
  // Check for ll or hh.
53
0
  if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) {
54
0
    return {str[0], str[1]};
55
0
  }
56
0
  if (std::strchr("hljztL", str[0]) != nullptr) {
57
0
    return {str[0]};
58
0
  }
59
0
  return {};
60
0
}
61
62
// Returns the error message that is used in place of a decoded arg when an
63
// error occurs.
64
std::string ErrorMessage(ArgStatus status,
65
                         std::string_view spec,
66
0
                         std::string_view value) {
67
0
  const char* message;
68
0
  if (status.HasError(ArgStatus::kSkipped)) {
69
0
    message = "SKIPPED";
70
0
  } else if (status.HasError(ArgStatus::kMissing)) {
71
0
    message = "MISSING";
72
0
  } else if (status.HasError(ArgStatus::kDecodeError)) {
73
0
    message = "ERROR";
74
0
  } else {
75
0
    message = "INTERNAL ERROR";
76
0
  }
77
78
0
  std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX);
79
0
  result.append(spec);
80
0
  result.push_back(' ');
81
0
  result.append(message);
82
83
0
  if (!value.empty()) {
84
0
    result.push_back(' ');
85
0
    result.push_back('(');
86
0
    result.append(value);
87
0
    result.push_back(')');
88
0
  }
89
90
0
  result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
91
0
  return result;
92
0
}
93
94
}  // namespace
95
96
DecodedArg::DecodedArg(ArgStatus error,
97
                       std::string_view spec,
98
                       size_t raw_size_bytes,
99
                       std::string_view value)
100
0
    : value_(ErrorMessage(error, spec, value)),
101
0
      spec_(spec),
102
0
      raw_data_size_bytes_(raw_size_bytes),
103
0
      status_(error) {}
104
105
13
StringSegment StringSegment::ParseFormatSpec(const char* format) {
106
13
  if (format[0] != '%' || format[1] == '\0') {
107
13
    return StringSegment();
108
13
  }
109
110
  // Parse the format specifier.
111
0
  size_t i = 1;
112
113
  // Skip the flags.
114
0
  i += SkipFlags(&format[i]);
115
116
  // Skip the field width.
117
0
  i += SkipAsteriskOrInteger(&format[i]);
118
119
  // Skip the precision.
120
0
  if (format[i] == '.') {
121
0
    i += 1;
122
0
    i += SkipAsteriskOrInteger(&format[i]);
123
0
  }
124
125
  // Read the length modifier.
126
0
  const std::array<char, 2> length = ReadLengthModifier(&format[i]);
127
0
  i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
128
129
  // Read the conversion specifier.
130
0
  const char spec = format[i];
131
132
0
  Type type;
133
0
  if (spec == 's') {
134
0
    type = kString;
135
0
  } else if (spec == 'c' || spec == 'd' || spec == 'i') {
136
0
    type = kSignedInt;
137
0
  } else if (std::strchr("oxXup", spec) != nullptr) {
138
    // The source size matters for unsigned integers because they need to be
139
    // masked off to their correct length, since zig-zag decode sign extends.
140
    // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p
141
    // needs to be 64-bit on these targets.
142
0
    type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32;
143
0
  } else if (std::strchr("fFeEaAgG", spec) != nullptr) {
144
0
    type = kFloatingPoint;
145
0
  } else if (spec == '%' && i == 1) {
146
0
    type = kPercent;
147
0
  } else {
148
0
    return StringSegment();
149
0
  }
150
151
0
  return {std::string_view(format, i + 1), type, VarargSize(length, spec)};
152
0
}
153
154
StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length,
155
0
                                                 char spec) {
156
  // Use pointer size for %p or any other type (for which this doesn't matter).
157
0
  if (std::strchr("cdioxXu", spec) == nullptr) {
158
0
    return VarargSize<void*>();
159
0
  }
160
0
  if (length[0] == 'l') {
161
0
    return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>();
162
0
  }
163
0
  if (length[0] == 'j') {
164
0
    return VarargSize<intmax_t>();
165
0
  }
166
0
  if (length[0] == 'z') {
167
0
    return VarargSize<size_t>();
168
0
  }
169
0
  if (length[0] == 't') {
170
0
    return VarargSize<ptrdiff_t>();
171
0
  }
172
0
  return VarargSize<int>();
173
0
}
174
175
DecodedArg StringSegment::DecodeString(
176
0
    const span<const uint8_t>& arguments) const {
177
0
  if (arguments.empty()) {
178
0
    return DecodedArg(ArgStatus::kMissing, text_);
179
0
  }
180
181
0
  ArgStatus status =
182
0
      (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated;
183
184
0
  const uint_fast8_t size = arguments[0] & 0x7Fu;
185
186
0
  if (arguments.size() - 1 < size) {
187
0
    status.Update(ArgStatus::kDecodeError);
188
0
    span<const uint8_t> arg_val = arguments.subspan(1);
189
0
    return DecodedArg(
190
0
        status,
191
0
        text_,
192
0
        arguments.size(),
193
0
        {reinterpret_cast<const char*>(arg_val.data()), arg_val.size()});
194
0
  }
195
196
0
  std::string value(reinterpret_cast<const char*>(arguments.data() + 1), size);
197
198
0
  if (status.HasError(ArgStatus::kTruncated)) {
199
0
    value.append("[...]");
200
0
  }
201
202
0
  return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status);
203
0
}
204
205
DecodedArg StringSegment::DecodeInteger(
206
0
    const span<const uint8_t>& arguments) const {
207
0
  if (arguments.empty()) {
208
0
    return DecodedArg(ArgStatus::kMissing, text_);
209
0
  }
210
211
0
  int64_t value;
212
0
  const size_t bytes = varint::Decode(as_bytes(arguments), &value);
213
214
0
  if (bytes == 0u) {
215
0
    return DecodedArg(ArgStatus::kDecodeError,
216
0
                      text_,
217
0
                      std::min(varint::kMaxVarint64SizeBytes,
218
0
                               static_cast<size_t>(arguments.size())));
219
0
  }
220
221
  // Unsigned ints need to be masked to their bit width due to sign extension.
222
0
  if (type_ == kUnsigned32) {
223
0
    value &= 0xFFFFFFFFu;
224
0
  }
225
226
0
  if (local_size_ == k32Bit) {
227
0
    return DecodedArg::FromValue(
228
0
        text_.c_str(), static_cast<uint32_t>(value), bytes);
229
0
  }
230
0
  return DecodedArg::FromValue(text_.c_str(), value, bytes);
231
0
}
232
233
DecodedArg StringSegment::DecodeFloatingPoint(
234
0
    const span<const uint8_t>& arguments) const {
235
0
  static_assert(sizeof(float) == 4u);
236
0
  if (arguments.size() < sizeof(float)) {
237
0
    return DecodedArg(ArgStatus::kMissing, text_);
238
0
  }
239
240
0
  float value;
241
0
  std::memcpy(&value, arguments.data(), sizeof(value));
242
0
  return DecodedArg::FromValue(text_.c_str(), value, sizeof(value));
243
0
}
244
245
2.35k
DecodedArg StringSegment::Decode(const span<const uint8_t>& arguments) const {
246
2.35k
  switch (type_) {
247
2.35k
    case kLiteral:
248
2.35k
      return DecodedArg(text_);
249
0
    case kPercent:
250
0
      return DecodedArg("%");
251
0
    case kString:
252
0
      return DecodeString(arguments);
253
0
    case kSignedInt:
254
0
    case kUnsigned32:
255
0
    case kUnsigned64:
256
0
      return DecodeInteger(arguments);
257
0
    case kFloatingPoint:
258
0
      return DecodeFloatingPoint(arguments);
259
2.35k
  }
260
261
0
  return DecodedArg(ArgStatus::kDecodeError, text_);
262
2.35k
}
263
264
0
DecodedArg StringSegment::Skip() const {
265
0
  switch (type_) {
266
0
    case kLiteral:
267
0
      return DecodedArg(text_);
268
0
    case kPercent:
269
0
      return DecodedArg("%");
270
0
    case kString:
271
0
    case kSignedInt:
272
0
    case kUnsigned32:
273
0
    case kUnsigned64:
274
0
    case kFloatingPoint:
275
0
    default:
276
0
      return DecodedArg(ArgStatus::kSkipped, text_);
277
0
  }
278
0
}
279
280
2.35k
std::string DecodedFormatString::value() const {
281
2.35k
  std::string output;
282
283
2.35k
  for (const DecodedArg& arg : segments_) {
284
2.35k
    output.append(arg.ok() ? arg.value() : arg.spec());
285
2.35k
  }
286
287
2.35k
  return output;
288
2.35k
}
289
290
0
std::string DecodedFormatString::value_with_errors() const {
291
0
  std::string output;
292
293
0
  for (const DecodedArg& arg : segments_) {
294
0
    output.append(arg.value());
295
0
  }
296
297
0
  return output;
298
0
}
299
300
0
size_t DecodedFormatString::argument_count() const {
301
0
  return static_cast<size_t>(
302
0
      std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
303
0
        return !arg.spec().empty();
304
0
      }));
305
0
}
306
307
0
size_t DecodedFormatString::decoding_errors() const {
308
0
  return static_cast<size_t>(
309
0
      std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
310
0
        return !arg.ok();
311
0
      }));
312
0
}
313
314
4
FormatString::FormatString(const char* format) {
315
4
  const char* text_start = format;
316
317
17
  while (format[0] != '\0') {
318
13
    if (StringSegment spec = StringSegment::ParseFormatSpec(format);
319
13
        !spec.empty()) {
320
      // Add the text segment seen so far (if any).
321
0
      if (text_start < format) {
322
0
        segments_.emplace_back(std::string_view(
323
0
            text_start, static_cast<size_t>(format - text_start)));
324
0
      }
325
326
      // Move along the index and text segment start.
327
0
      format += spec.text().size();
328
0
      text_start = format;
329
330
      // Add the format specifier that was just found.
331
0
      segments_.push_back(std::move(spec));
332
13
    } else {
333
13
      format += 1;
334
13
    }
335
13
  }
336
337
4
  if (text_start < format) {
338
4
    segments_.emplace_back(
339
4
        std::string_view(text_start, static_cast<size_t>(format - text_start)));
340
4
  }
341
4
}
342
343
2.35k
DecodedFormatString FormatString::Format(span<const uint8_t> arguments) const {
344
2.35k
  std::vector<DecodedArg> results;
345
2.35k
  bool skip = false;
346
347
2.35k
  for (const auto& segment : segments_) {
348
2.35k
    if (skip) {
349
0
      results.push_back(segment.Skip());
350
2.35k
    } else {
351
2.35k
      results.push_back(segment.Decode(arguments));
352
2.35k
      arguments = arguments.subspan(results.back().raw_size_bytes());
353
354
      // If an error occurred, skip decoding the remaining arguments.
355
2.35k
      if (!results.back().ok()) {
356
0
        skip = true;
357
0
      }
358
2.35k
    }
359
2.35k
  }
360
361
2.35k
  return DecodedFormatString(std::move(results), arguments.size());
362
2.35k
}
363
364
}  // namespace pw::tokenizer