Coverage Report

Created: 2026-02-26 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/utf8.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <string.h>
15
16
#include <isc/utf8.h>
17
#include <isc/util.h>
18
19
/*
20
 * UTF-8 is defined in "The Unicode Standard -- Version 4.0"
21
 * Also see RFC 3629.
22
 *
23
 * Char. number range  |        UTF-8 octet sequence
24
 *    (hexadecimal)    |              (binary)
25
 *  --------------------+---------------------------------------------
26
 * 0000 0000-0000 007F | 0xxxxxxx
27
 * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
28
 * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
29
 * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
30
 */
31
bool
32
18.2k
isc_utf8_valid(const unsigned char *buf, size_t len) {
33
18.2k
  REQUIRE(buf != NULL);
34
35
618k
  for (size_t i = 0; i < len; i++) {
36
600k
    if (buf[i] <= 0x7f) {
37
591k
      continue;
38
591k
    }
39
8.83k
    if ((i + 1) < len && (buf[i] & 0xe0) == 0xc0 &&
40
3.40k
        (buf[i + 1] & 0xc0) == 0x80)
41
3.38k
    {
42
3.38k
      unsigned int w;
43
3.38k
      w = (buf[i] & 0x1f) << 6;
44
3.38k
      w |= (buf[++i] & 0x3f);
45
3.38k
      if (w < 0x80) {
46
6
        return false;
47
6
      }
48
3.37k
      continue;
49
3.38k
    }
50
5.45k
    if ((i + 2) < len && (buf[i] & 0xf0) == 0xe0 &&
51
1.37k
        (buf[i + 1] & 0xc0) == 0x80 && (buf[i + 2] & 0xc0) == 0x80)
52
1.33k
    {
53
1.33k
      unsigned int w;
54
1.33k
      w = (buf[i] & 0x0f) << 12;
55
1.33k
      w |= (buf[++i] & 0x3f) << 6;
56
1.33k
      w |= (buf[++i] & 0x3f);
57
1.33k
      if (w < 0x0800) {
58
4
        return false;
59
4
      }
60
1.33k
      continue;
61
1.33k
    }
62
4.11k
    if ((i + 3) < len && (buf[i] & 0xf8) == 0xf0 &&
63
3.96k
        (buf[i + 1] & 0xc0) == 0x80 &&
64
3.96k
        (buf[i + 2] & 0xc0) == 0x80 && (buf[i + 3] & 0xc0) == 0x80)
65
3.93k
    {
66
3.93k
      unsigned int w;
67
3.93k
      w = (buf[i] & 0x07) << 18;
68
3.93k
      w |= (buf[++i] & 0x3f) << 12;
69
3.93k
      w |= (buf[++i] & 0x3f) << 6;
70
3.93k
      w |= (buf[++i] & 0x3f);
71
3.93k
      if (w < 0x10000 || w > 0x10FFFF) {
72
29
        return false;
73
29
      }
74
3.90k
      continue;
75
3.93k
    }
76
179
    return false;
77
4.11k
  }
78
18.0k
  return true;
79
18.2k
}
80
81
bool
82
10.2k
isc_utf8_bom(const unsigned char *buf, size_t len) {
83
10.2k
  REQUIRE(buf != NULL);
84
85
10.2k
  if (len >= 3U && !memcmp(buf, "\xef\xbb\xbf", 3)) {
86
1
    return true;
87
1
  }
88
10.2k
  return false;
89
10.2k
}