Coverage Report

Created: 2025-11-25 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libprotobuf-mutator/src/utf8_fix.cc
Line
Count
Source
1
// Copyright 2017 Google Inc. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "src/utf8_fix.h"
16
17
#include <algorithm>
18
#include <cassert>
19
20
namespace protobuf_mutator {
21
22
namespace {
23
24
0
void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
25
0
  while (--size) {
26
0
    *(--e) = 0x80 | (code & 0x3F);
27
0
    code >>= 6;
28
0
  }
29
0
  *(--e) = prefix | code;
30
0
}
31
32
0
char* FixCode(char* b, const char* e, RandomEngine* random) {
33
0
  const char* start = b;
34
0
  assert(b < e);
35
36
0
  e = std::min<const char*>(e, b + 4);
37
0
  char32_t c = *b++;
38
0
  for (; b < e && (*b & 0xC0) == 0x80; ++b) {
39
0
    c = (c << 6) + (*b & 0x3F);
40
0
  }
41
0
  uint8_t size = b - start;
42
0
  switch (size) {
43
0
    case 1:
44
0
      c &= 0x7F;
45
0
      StoreCode(b, c, size, 0);
46
0
      break;
47
0
    case 2:
48
0
      c &= 0x7FF;
49
0
      if (c < 0x80) {
50
        // Use uint32_t because uniform_int_distribution does not support
51
        // char32_t on Windows.
52
0
        c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random);
53
0
      }
54
0
      StoreCode(b, c, size, 0xC0);
55
0
      break;
56
0
    case 3:
57
0
      c &= 0xFFFF;
58
59
      // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
60
0
      if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
61
0
        uint32_t halves = 0xE000 - 0xD800;
62
0
        c = std::uniform_int_distribution<uint32_t>(0x800,
63
0
                                                    0xFFFF - halves)(*random);
64
0
        if (c >= 0xD800) c += halves;
65
0
      }
66
0
      StoreCode(b, c, size, 0xE0);
67
0
      break;
68
0
    case 4:
69
0
      c &= 0x1FFFFF;
70
0
      if (c < 0x10000 || c > 0x10FFFF) {
71
0
        c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random);
72
0
      }
73
0
      StoreCode(b, c, size, 0xF0);
74
0
      break;
75
0
    default:
76
0
      assert(false && "Unexpected size of UTF-8 sequence");
77
0
  }
78
0
  return b;
79
0
}
80
81
}  // namespace
82
83
0
void FixUtf8String(std::string* str, RandomEngine* random) {
84
0
  if (str->empty()) return;
85
0
  char* b = &(*str)[0];
86
0
  const char* e = b + str->size();
87
0
  while (b < e) {
88
0
    b = FixCode(b, e, random);
89
0
  }
90
0
}
91
92
}  // namespace protobuf_mutator