Coverage Report

Created: 2026-06-15 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boringssl/crypto/bytestring/unicode.cc
Line
Count
Source
1
// Copyright 2018 The BoringSSL Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include <openssl/bytestring.h>
16
17
#include "internal.h"
18
19
20
29.4M
static int is_valid_code_point(uint32_t v) {
21
  // References in the following are to Unicode 15.0.0.
22
29.4M
  if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
23
29.4M
      v > 0x10ffff ||
24
      // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
25
      // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for
26
      // "open interchange", such as ASN.1, we reject them.
27
29.4M
      (v & 0xfffe) == 0xfffe ||
28
29.4M
      (v >= 0xfdd0 && v <= 0xfdef) ||
29
      // Surrogate code points are invalid (3.2 C1).
30
29.4M
      (v >= 0xd800 && v <= 0xdfff)) {
31
2.36k
    return 0;
32
2.36k
  }
33
29.4M
  return 1;
34
29.4M
}
35
36
// BOTTOM_BITS returns a byte with the bottom `n` bits set.
37
46.8M
#define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
38
39
// TOP_BITS returns a byte with the top `n` bits set.
40
28.1M
#define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
41
42
8.10M
int CBS_get_utf8(CBS *cbs, uint32_t *out) {
43
8.10M
  uint8_t c;
44
8.10M
  if (!CBS_get_u8(cbs, &c)) {
45
0
    return 0;
46
0
  }
47
8.10M
  if (c <= 0x7f) {
48
8.07M
    *out = c;
49
8.07M
    return 1;
50
8.07M
  }
51
31.3k
  uint32_t v, lower_bound;
52
31.3k
  size_t len;
53
31.3k
  if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
54
23.4k
    v = c & BOTTOM_BITS(5);
55
23.4k
    len = 1;
56
23.4k
    lower_bound = 0x80;
57
23.4k
  } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
58
4.44k
    v = c & BOTTOM_BITS(4);
59
4.44k
    len = 2;
60
4.44k
    lower_bound = 0x800;
61
4.44k
  } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
62
3.22k
    v = c & BOTTOM_BITS(3);
63
3.22k
    len = 3;
64
3.22k
    lower_bound = 0x10000;
65
3.22k
  } else {
66
220
    return 0;
67
220
  }
68
72.9k
  for (size_t i = 0; i < len; i++) {
69
41.9k
    if (!CBS_get_u8(cbs, &c) ||
70
41.8k
        (c & TOP_BITS(2)) != TOP_BITS(1)) {
71
165
      return 0;
72
165
    }
73
41.7k
    v <<= 6;
74
41.7k
    v |= c & BOTTOM_BITS(6);
75
41.7k
  }
76
30.9k
  if (!is_valid_code_point(v) ||
77
30.8k
      v < lower_bound) {
78
190
    return 0;
79
190
  }
80
30.7k
  *out = v;
81
30.7k
  return 1;
82
30.9k
}
83
84
3.69M
int CBS_get_latin1(CBS *cbs, uint32_t *out) {
85
3.69M
  uint8_t c;
86
3.69M
  if (!CBS_get_u8(cbs, &c)) {
87
0
    return 0;
88
0
  }
89
3.69M
  *out = c;
90
3.69M
  return 1;
91
3.69M
}
92
93
13.3M
int CBS_get_ucs2_be(CBS *cbs, uint32_t *out) {
94
  // Note UCS-2 (used by BMPString) does not support surrogates.
95
13.3M
  uint16_t c;
96
13.3M
  if (!CBS_get_u16(cbs, &c) ||
97
13.3M
      !is_valid_code_point(c)) {
98
942
    return 0;
99
942
  }
100
13.3M
  *out = c;
101
13.3M
  return 1;
102
13.3M
}
103
104
25.8k
int CBS_get_utf32_be(CBS *cbs, uint32_t *out) {
105
25.8k
  return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
106
25.8k
}
107
108
0
size_t CBB_get_utf8_len(uint32_t u) {
109
0
  if (u <= 0x7f) {
110
0
    return 1;
111
0
  }
112
0
  if (u <= 0x7ff) {
113
0
    return 2;
114
0
  }
115
0
  if (u <= 0xffff) {
116
0
    return 3;
117
0
  }
118
0
  return 4;
119
0
}
120
121
16.0M
int CBB_add_utf8(CBB *cbb, uint32_t u) {
122
16.0M
  if (!is_valid_code_point(u)) {
123
0
    return 0;
124
0
  }
125
16.0M
  if (u <= 0x7f) {
126
6.66M
    return CBB_add_u8(cbb, (uint8_t)u);
127
6.66M
  }
128
9.41M
  if (u <= 0x7ff) {
129
232k
    return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
130
232k
           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
131
232k
  }
132
9.17M
  if (u <= 0xffff) {
133
9.16M
    return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
134
9.16M
           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
135
9.16M
           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
136
9.16M
  }
137
11.7k
  if (u <= 0x10ffff) {
138
11.7k
    return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
139
11.7k
           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
140
11.7k
           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
141
11.7k
           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
142
11.7k
  }
143
0
  return 0;
144
11.7k
}
145
146
0
int CBB_add_latin1(CBB *cbb, uint32_t u) {
147
0
  return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
148
0
}
149
150
1.88k
int CBB_add_ucs2_be(CBB *cbb, uint32_t u) {
151
1.88k
  return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
152
1.88k
}
153
154
0
int CBB_add_utf32_be(CBB *cbb, uint32_t u) {
155
0
  return is_valid_code_point(u) && CBB_add_u32(cbb, u);
156
0
}