Coverage Report

Created: 2026-01-24 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/utf8.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <string.h>
15
16
#include <isc/utf8.h>
17
#include <isc/util.h>
18
19
/*
20
 * UTF-8 is defined in "The Unicode Standard -- Version 4.0"
21
 * Also see RFC 3629.
22
 *
23
 * Char. number range  |        UTF-8 octet sequence
24
 *    (hexadecimal)    |              (binary)
25
 *  --------------------+---------------------------------------------
26
 * 0000 0000-0000 007F | 0xxxxxxx
27
 * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
28
 * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
29
 * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
30
 */
31
bool
32
16.1k
isc_utf8_valid(const unsigned char *buf, size_t len) {
33
16.1k
  REQUIRE(buf != NULL);
34
35
317k
  for (size_t i = 0; i < len; i++) {
36
302k
    if (buf[i] <= 0x7f) {
37
294k
      continue;
38
294k
    }
39
7.20k
    if ((i + 1) < len && (buf[i] & 0xe0) == 0xc0 &&
40
3.08k
        (buf[i + 1] & 0xc0) == 0x80)
41
3.05k
    {
42
3.05k
      unsigned int w;
43
3.05k
      w = (buf[i] & 0x1f) << 6;
44
3.05k
      w |= (buf[++i] & 0x3f);
45
3.05k
      if (w < 0x80) {
46
8
        return false;
47
8
      }
48
3.04k
      continue;
49
3.05k
    }
50
4.15k
    if ((i + 2) < len && (buf[i] & 0xf0) == 0xe0 &&
51
1.46k
        (buf[i + 1] & 0xc0) == 0x80 && (buf[i + 2] & 0xc0) == 0x80)
52
1.43k
    {
53
1.43k
      unsigned int w;
54
1.43k
      w = (buf[i] & 0x0f) << 12;
55
1.43k
      w |= (buf[++i] & 0x3f) << 6;
56
1.43k
      w |= (buf[++i] & 0x3f);
57
1.43k
      if (w < 0x0800) {
58
4
        return false;
59
4
      }
60
1.43k
      continue;
61
1.43k
    }
62
2.71k
    if ((i + 3) < len && (buf[i] & 0xf8) == 0xf0 &&
63
2.56k
        (buf[i + 1] & 0xc0) == 0x80 &&
64
2.55k
        (buf[i + 2] & 0xc0) == 0x80 && (buf[i + 3] & 0xc0) == 0x80)
65
2.53k
    {
66
2.53k
      unsigned int w;
67
2.53k
      w = (buf[i] & 0x07) << 18;
68
2.53k
      w |= (buf[++i] & 0x3f) << 12;
69
2.53k
      w |= (buf[++i] & 0x3f) << 6;
70
2.53k
      w |= (buf[++i] & 0x3f);
71
2.53k
      if (w < 0x10000 || w > 0x10FFFF) {
72
28
        return false;
73
28
      }
74
2.50k
      continue;
75
2.53k
    }
76
183
    return false;
77
2.71k
  }
78
15.8k
  return true;
79
16.1k
}
80
81
bool
82
8.84k
isc_utf8_bom(const unsigned char *buf, size_t len) {
83
8.84k
  REQUIRE(buf != NULL);
84
85
8.84k
  if (len >= 3U && !memcmp(buf, "\xef\xbb\xbf", 3)) {
86
1
    return true;
87
1
  }
88
8.84k
  return false;
89
8.84k
}