Coverage Report

Created: 2022-08-24 06:55

/src/solidity/libsolutil/UTF8.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
  This file is part of solidity.
3
4
  solidity is free software: you can redistribute it and/or modify
5
  it under the terms of the GNU General Public License as published by
6
  the Free Software Foundation, either version 3 of the License, or
7
  (at your option) any later version.
8
9
  solidity is distributed in the hope that it will be useful,
10
  but WITHOUT ANY WARRANTY; without even the implied warranty of
11
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
  GNU General Public License for more details.
13
14
  You should have received a copy of the GNU General Public License
15
  along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16
*/
17
// SPDX-License-Identifier: GPL-3.0
18
/** @file UTF8.cpp
19
 * @author Alex Beregszaszi
20
 * @date 2016
21
 *
22
 * UTF-8 related helpers
23
 */
24
25
#include <libsolutil/UTF8.h>
26
27
namespace solidity::util
28
{
29
namespace
30
{
31
32
/// Validate byte sequence against Unicode chapter 3 Table 3-7.
33
bool isWellFormed(unsigned char byte1, unsigned char byte2)
34
0
{
35
0
  if (byte1 == 0xc0 || byte1 == 0xc1)
36
0
    return false;
37
0
  else if (byte1 >= 0xc2 && byte1 <= 0xdf)
38
0
    return true;
39
0
  else if (byte1 == 0xe0)
40
0
  {
41
0
    if (byte2 < 0xa0)
42
0
      return false;
43
0
    else
44
0
      return true;
45
0
  }
46
0
  else if (byte1 >= 0xe1 && byte1 <= 0xec)
47
0
    return true;
48
0
  else if (byte1 == 0xed)
49
0
  {
50
0
    if (byte2 > 0x9f)
51
0
      return false;
52
0
    else
53
0
      return true;
54
0
  }
55
0
  else if (byte1 == 0xee || byte1 == 0xef)
56
0
    return true;
57
0
  else if (byte1 == 0xf0)
58
0
  {
59
0
    if (byte2 < 0x90)
60
0
      return false;
61
0
    else
62
0
      return true;
63
0
  }
64
0
  else if (byte1 >= 0xf1 && byte1 <= 0xf3)
65
0
    return true;
66
0
  else if (byte1 == 0xf4)
67
0
  {
68
0
    if (byte2 > 0x8f)
69
0
      return false;
70
0
    else
71
0
      return true;
72
0
  }
73
  /// 0xf5 .. 0xf7 is disallowed
74
  /// Technically anything below 0xc0 or above 0xf7 is
75
  /// not possible to encode using Table 3-6 anyway.
76
0
  return false;
77
0
}
78
79
bool validateUTF8(unsigned char const* _input, size_t _length, size_t& _invalidPosition)
80
0
{
81
0
  bool valid = true;
82
0
  size_t i = 0;
83
84
0
  for (; i < _length; i++)
85
0
  {
86
    // Check for Unicode Chapter 3 Table 3-6 conformity.
87
0
    if (_input[i] < 0x80)
88
0
      continue;
89
90
0
    size_t count = 0;
91
0
    if (_input[i] >= 0xc0 && _input[i] <= 0xdf)
92
0
      count = 1;
93
0
    else if (_input[i] >= 0xe0 && _input[i] <= 0xef)
94
0
      count = 2;
95
0
    else if (_input[i] >= 0xf0 && _input[i] <= 0xf7)
96
0
      count = 3;
97
98
0
    if (count == 0)
99
0
    {
100
0
      valid = false;
101
0
      break;
102
0
    }
103
104
0
    if ((i + count) >= _length)
105
0
    {
106
0
      valid = false;
107
0
      break;
108
0
    }
109
110
0
    for (size_t j = 0; j < count; j++)
111
0
    {
112
0
      i++;
113
0
      if ((_input[i] & 0xc0) != 0x80)
114
0
      {
115
0
        valid = false;
116
0
        break;
117
0
      }
118
119
      // Check for Unicode Chapter 3 Table 3-7 conformity.
120
0
      if ((j == 0) && !isWellFormed(_input[i - 1], _input[i]))
121
0
      {
122
0
        valid = false;
123
0
        break;
124
0
      }
125
0
    }
126
0
  }
127
128
0
  if (valid)
129
0
    return true;
130
131
0
  _invalidPosition = i;
132
0
  return false;
133
0
}
134
135
}
136
137
bool validateUTF8(std::string const& _input, size_t& _invalidPosition)
138
0
{
139
0
  return validateUTF8(reinterpret_cast<unsigned char const*>(_input.c_str()), _input.length(), _invalidPosition);
140
0
}
141
142
}