Coverage Report

Created: 2026-01-10 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/utf8.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <string.h>
15
16
#include <isc/utf8.h>
17
#include <isc/util.h>
18
19
/*
20
 * UTF-8 is defined in "The Unicode Standard -- Version 4.0"
21
 * Also see RFC 3629.
22
 *
23
 * Char. number range  |        UTF-8 octet sequence
24
 *    (hexadecimal)    |              (binary)
25
 *  --------------------+---------------------------------------------
26
 * 0000 0000-0000 007F | 0xxxxxxx
27
 * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
28
 * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
29
 * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
30
 */
31
bool
32
18.4k
isc_utf8_valid(const unsigned char *buf, size_t len) {
33
18.4k
  REQUIRE(buf != NULL);
34
35
296k
  for (size_t i = 0; i < len; i++) {
36
278k
    if (buf[i] <= 0x7f) {
37
271k
      continue;
38
271k
    }
39
6.71k
    if ((i + 1) < len && (buf[i] & 0xe0) == 0xc0 &&
40
3.27k
        (buf[i + 1] & 0xc0) == 0x80)
41
3.23k
    {
42
3.23k
      unsigned int w;
43
3.23k
      w = (buf[i] & 0x1f) << 6;
44
3.23k
      w |= (buf[++i] & 0x3f);
45
3.23k
      if (w < 0x80) {
46
6
        return false;
47
6
      }
48
3.22k
      continue;
49
3.23k
    }
50
3.48k
    if ((i + 2) < len && (buf[i] & 0xf0) == 0xe0 &&
51
1.34k
        (buf[i + 1] & 0xc0) == 0x80 && (buf[i + 2] & 0xc0) == 0x80)
52
1.30k
    {
53
1.30k
      unsigned int w;
54
1.30k
      w = (buf[i] & 0x0f) << 12;
55
1.30k
      w |= (buf[++i] & 0x3f) << 6;
56
1.30k
      w |= (buf[++i] & 0x3f);
57
1.30k
      if (w < 0x0800) {
58
4
        return false;
59
4
      }
60
1.30k
      continue;
61
1.30k
    }
62
2.18k
    if ((i + 3) < len && (buf[i] & 0xf8) == 0xf0 &&
63
2.01k
        (buf[i + 1] & 0xc0) == 0x80 &&
64
2.00k
        (buf[i + 2] & 0xc0) == 0x80 && (buf[i + 3] & 0xc0) == 0x80)
65
1.97k
    {
66
1.97k
      unsigned int w;
67
1.97k
      w = (buf[i] & 0x07) << 18;
68
1.97k
      w |= (buf[++i] & 0x3f) << 12;
69
1.97k
      w |= (buf[++i] & 0x3f) << 6;
70
1.97k
      w |= (buf[++i] & 0x3f);
71
1.97k
      if (w < 0x10000 || w > 0x10FFFF) {
72
30
        return false;
73
30
      }
74
1.94k
      continue;
75
1.97k
    }
76
203
    return false;
77
2.18k
  }
78
18.2k
  return true;
79
18.4k
}
80
81
bool
82
10.6k
isc_utf8_bom(const unsigned char *buf, size_t len) {
83
10.6k
  REQUIRE(buf != NULL);
84
85
10.6k
  if (len >= 3U && !memcmp(buf, "\xef\xbb\xbf", 3)) {
86
1
    return true;
87
1
  }
88
10.6k
  return false;
89
10.6k
}