Coverage Report

Created: 2026-01-10 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libgit2/src/util/utf8.c
Line
Count
Source
1
/*
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
4
 * This file is part of libgit2, distributed under the GNU GPL v2 with
5
 * a Linking Exception. For full terms see the included COPYING file.
6
 */
7
8
#include "utf8.h"
9
10
#include "git2_util.h"
11
12
/*
13
 * git_utf8_iterate is taken from the utf8proc project,
14
 * http://www.public-software-group.org/utf8proc
15
 *
16
 * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
17
 *
18
 * Permission is hereby granted, free of charge, to any person obtaining a
19
 * copy of this software and associated documentation files (the ""Software""),
20
 * to deal in the Software without restriction, including without limitation
21
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
22
 * and/or sell copies of the Software, and to permit persons to whom the
23
 * Software is furnished to do so, subject to the following conditions:
24
 *
25
 * The above copyright notice and this permission notice shall be included in
26
 * all copies or substantial portions of the Software.
27
 *
28
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34
 * DEALINGS IN THE SOFTWARE.
35
 */
36
37
static const uint8_t utf8proc_utf8class[256] = {
38
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53
  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
54
};
55
56
static int utf8_charlen(const uint8_t *str, size_t str_len)
57
0
{
58
0
  uint8_t length;
59
0
  size_t i;
60
61
0
  length = utf8proc_utf8class[str[0]];
62
0
  if (!length)
63
0
    return -1;
64
65
0
  if (str_len > 0 && length > str_len)
66
0
    return -1;
67
68
0
  for (i = 1; i < length; i++) {
69
0
    if ((str[i] & 0xC0) != 0x80)
70
0
      return -1;
71
0
  }
72
73
0
  return (int)length;
74
0
}
75
76
int git_utf8_iterate(uint32_t *out, const char *_str, size_t str_len)
77
0
{
78
0
  const uint8_t *str = (const uint8_t *)_str;
79
0
  uint32_t uc = 0;
80
0
  int length;
81
82
0
  *out = 0;
83
84
0
  if ((length = utf8_charlen(str, str_len)) < 0)
85
0
    return -1;
86
87
0
  switch (length) {
88
0
    case 1:
89
0
      uc = str[0];
90
0
      break;
91
0
    case 2:
92
0
      uc = ((str[0] & 0x1F) <<  6) + (str[1] & 0x3F);
93
0
      if (uc < 0x80) uc = -1;
94
0
      break;
95
0
    case 3:
96
0
      uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6)
97
0
        + (str[2] & 0x3F);
98
0
      if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
99
0
          (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
100
0
      break;
101
0
    case 4:
102
0
      uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
103
0
        + ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F);
104
0
      if (uc < 0x10000 || uc >= 0x110000) uc = -1;
105
0
      break;
106
0
    default:
107
0
      return -1;
108
0
  }
109
110
0
  if ((uc & 0xFFFF) >= 0xFFFE)
111
0
    return -1;
112
113
0
  *out = uc;
114
0
  return length;
115
0
}
116
117
size_t git_utf8_char_length(const char *_str, size_t str_len)
118
0
{
119
0
  const uint8_t *str = (const uint8_t *)_str;
120
0
  size_t offset = 0, count = 0;
121
122
0
  while (offset < str_len) {
123
0
    int length = utf8_charlen(str + offset, str_len - offset);
124
125
0
    if (length < 0)
126
0
      length = 1;
127
128
0
    offset += length;
129
0
    count++;
130
0
  }
131
132
0
  return count;
133
0
}
134
135
size_t git_utf8_valid_buf_length(const char *_str, size_t str_len)
136
0
{
137
0
  const uint8_t *str = (const uint8_t *)_str;
138
0
  size_t offset = 0;
139
140
0
  while (offset < str_len) {
141
0
    int length = utf8_charlen(str + offset, str_len - offset);
142
143
0
    if (length < 0)
144
0
      break;
145
146
0
    offset += length;
147
0
  }
148
149
0
  return offset;
150
0
}