Coverage Report

Created: 2026-03-12 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tmux/utf8-combined.c
Line
Count
Source
1
/* $OpenBSD$ */
2
3
/*
4
 * Copyright (c) 2023 Nicholas Marriott <nicholas.marriott@gmail.com>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
21
#include <stdlib.h>
22
#include <string.h>
23
#include <wchar.h>
24
25
#include "tmux.h"
26
27
enum hanguljamo_subclass {
28
  HANGULJAMO_SUBCLASS_NOT_HANGULJAMO,
29
  HANGULJAMO_SUBCLASS_CHOSEONG,     // U+1100 - U+1112
30
  HANGULJAMO_SUBCLASS_OLD_CHOSEONG,   // U+1113 - U+115E
31
  HANGULJAMO_SUBCLASS_CHOSEONG_FILLER,    // U+115F
32
  HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER,   // U+1160
33
  HANGULJAMO_SUBCLASS_JUNGSEONG,      // U+1161 - U+1175
34
  HANGULJAMO_SUBCLASS_OLD_JUNGSEONG,    // U+1176 - U+11A7
35
  HANGULJAMO_SUBCLASS_JONGSEONG,      // U+11A8 - U+11C2
36
  HANGULJAMO_SUBCLASS_OLD_JONGSEONG,    // U+11C3 - U+11FF
37
  HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG,  // U+A960 - U+A97C
38
  HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG, // U+D7B0 - U+D7C6
39
  HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG  // U+D7CB - U+D7FB
40
};
41
42
enum hanguljamo_class {
43
  HANGULJAMO_CLASS_NOT_HANGULJAMO,
44
  HANGULJAMO_CLASS_CHOSEONG,
45
  HANGULJAMO_CLASS_JUNGSEONG,
46
  HANGULJAMO_CLASS_JONGSEONG
47
};
48
49
/* Has this got a zero width joiner at the end? */
50
int
51
utf8_has_zwj(const struct utf8_data *ud)
52
7.00k
{
53
7.00k
  if (ud->size < 3)
54
3.20k
    return (0);
55
3.79k
  return (memcmp(ud->data + ud->size - 3, "\342\200\215", 3) == 0);
56
7.00k
}
57
58
/* Is this zero width joiner U+200D? */
59
int
60
utf8_is_zwj(const struct utf8_data *ud)
61
23.9k
{
62
23.9k
  if (ud->size != 3)
63
17.1k
    return (0);
64
6.87k
  return (memcmp(ud->data, "\342\200\215", 3) == 0);
65
23.9k
}
66
67
/* Is this variation selector U+FE0F? */
68
int
69
utf8_is_vs(const struct utf8_data *ud)
70
23.9k
{
71
23.9k
  if (ud->size != 3)
72
17.1k
    return (0);
73
6.87k
  return (memcmp(ud->data, "\357\270\217", 3) == 0);
74
23.9k
}
75
76
/* Is this Hangul filler U+3164? */
77
int
78
utf8_is_hangul_filler(const struct utf8_data *ud)
79
23.9k
{
80
23.9k
  if (ud->size != 3)
81
17.1k
    return (0);
82
6.87k
  return (memcmp(ud->data, "\343\205\244", 3) == 0);
83
23.9k
}
84
85
/* Should these two characters combine? */
86
int
87
utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add)
88
14.0k
{
89
14.0k
  wchar_t w, a;
90
91
14.0k
  if (utf8_towc(with, &w) != UTF8_DONE)
92
9.23k
    return (0);
93
4.76k
  if (utf8_towc(add, &a) != UTF8_DONE)
94
2.21k
    return (0);
95
96
  /* Regional indicators. */
97
2.54k
  if ((a >= 0x1F1E6 && a <= 0x1F1FF) && (w >= 0x1F1E6 && w <= 0x1F1FF))
98
0
    return (1);
99
100
  /* Emoji skin tone modifiers. */
101
2.54k
  switch (a) {
102
0
  case 0x1F44B:
103
0
  case 0x1F44C:
104
0
  case 0x1F44D:
105
0
  case 0x1F44E:
106
0
  case 0x1F44F:
107
0
  case 0x1F450:
108
0
  case 0x1F466:
109
0
  case 0x1F467:
110
0
  case 0x1F468:
111
0
  case 0x1F469:
112
0
  case 0x1F46E:
113
0
  case 0x1F470:
114
0
  case 0x1F471:
115
0
  case 0x1F472:
116
0
  case 0x1F473:
117
0
  case 0x1F474:
118
0
  case 0x1F475:
119
0
  case 0x1F476:
120
0
  case 0x1F477:
121
0
  case 0x1F478:
122
0
  case 0x1F47C:
123
0
  case 0x1F481:
124
0
  case 0x1F482:
125
0
  case 0x1F483:
126
0
  case 0x1F485:
127
0
  case 0x1F486:
128
0
  case 0x1F487:
129
0
  case 0x1F4AA:
130
0
  case 0x1F575:
131
0
  case 0x1F57A:
132
0
  case 0x1F590:
133
0
  case 0x1F595:
134
0
  case 0x1F596:
135
0
  case 0x1F645:
136
0
  case 0x1F646:
137
0
  case 0x1F647:
138
0
  case 0x1F64B:
139
0
  case 0x1F64C:
140
0
  case 0x1F64D:
141
0
  case 0x1F64E:
142
0
  case 0x1F64F:
143
0
  case 0x1F6B4:
144
0
  case 0x1F6B5:
145
0
  case 0x1F6B6:
146
0
  case 0x1F926:
147
0
  case 0x1F937:
148
0
  case 0x1F938:
149
0
  case 0x1F939:
150
0
  case 0x1F93D:
151
0
  case 0x1F93E:
152
0
  case 0x1F9B5:
153
0
  case 0x1F9B6:
154
0
  case 0x1F9B8:
155
0
  case 0x1F9B9:
156
0
  case 0x1F9CD:
157
0
  case 0x1F9CE:
158
0
  case 0x1F9CF:
159
0
  case 0x1F9D1:
160
0
  case 0x1F9D2:
161
0
  case 0x1F9D3:
162
0
  case 0x1F9D4:
163
0
  case 0x1F9D5:
164
0
  case 0x1F9D6:
165
0
  case 0x1F9D7:
166
0
  case 0x1F9D8:
167
0
  case 0x1F9D9:
168
0
  case 0x1F9DA:
169
0
  case 0x1F9DB:
170
0
  case 0x1F9DC:
171
0
  case 0x1F9DD:
172
0
  case 0x1F9DE:
173
0
  case 0x1F9DF:
174
0
    if (w >= 0x1F3FB && w <= 0x1F3FF)
175
0
      return (1);
176
0
    break;
177
2.54k
  }
178
2.54k
  return 0;
179
2.54k
}
180
181
static enum hanguljamo_subclass
182
hanguljamo_get_subclass(const u_char *s)
183
5.44k
{
184
5.44k
  switch (s[0]) {
185
0
  case 0xE1:
186
0
    switch (s[1]) {
187
0
    case 0x84:
188
0
      if (s[2] >= 0x80 && s[2] <= 0x92)
189
0
        return (HANGULJAMO_SUBCLASS_CHOSEONG);
190
0
      if (s[2] >= 0x93 && s[2] <= 0xBF)
191
0
        return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG);
192
0
      break;
193
0
    case 0x85:
194
0
      if (s[2] == 0x9F)
195
0
        return (HANGULJAMO_SUBCLASS_CHOSEONG_FILLER);
196
0
      if (s[2] == 0xA0)
197
0
        return (HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER);
198
0
      if (s[2] >= 0x80 && s[2] <= 0x9E)
199
0
        return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG);
200
0
      if (s[2] >= 0xA1 && s[2] <= 0xB5)
201
0
        return (HANGULJAMO_SUBCLASS_JUNGSEONG);
202
0
      if (s[2] >= 0xB6 && s[2] <= 0xBF)
203
0
        return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG);
204
0
      break;
205
0
    case 0x86:
206
0
      if (s[2] >= 0x80 && s[2] <= 0xA7)
207
0
        return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG);
208
0
      if (s[2] >= 0xA8 && s[2] <= 0xBF)
209
0
        return (HANGULJAMO_SUBCLASS_JONGSEONG);
210
0
      break;
211
0
    case 0x87:
212
0
      if (s[2] >= 0x80 && s[2] <= 0x82)
213
0
        return (HANGULJAMO_SUBCLASS_JONGSEONG);
214
0
      if (s[2] >= 0x83 && s[2] <= 0xBF)
215
0
        return (HANGULJAMO_SUBCLASS_OLD_JONGSEONG);
216
0
      break;
217
0
    }
218
0
    break;
219
0
  case 0xEA:
220
0
    if (s[1] == 0xA5 && s[2] >= 0xA0 && s[2] <= 0xBC)
221
0
      return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG);
222
0
    break;
223
0
  case 0xED:
224
0
    if (s[1] == 0x9E && s[2] >= 0xB0 && s[2] <= 0xBF)
225
0
      return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG);
226
0
    if (s[1] != 0x9F)
227
0
      break;
228
0
    if (s[2] >= 0x80 && s[2] <= 0x86)
229
0
      return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG);
230
0
    if (s[2] >= 0x8B && s[2] <= 0xBB)
231
0
      return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG);
232
0
    break;
233
5.44k
  }
234
5.44k
  return (HANGULJAMO_SUBCLASS_NOT_HANGULJAMO);
235
5.44k
}
236
237
static enum hanguljamo_class
238
hanguljamo_get_class(const u_char *s)
239
5.44k
{
240
5.44k
  switch (hanguljamo_get_subclass(s)) {
241
0
  case HANGULJAMO_SUBCLASS_CHOSEONG:
242
0
  case HANGULJAMO_SUBCLASS_CHOSEONG_FILLER:
243
0
  case HANGULJAMO_SUBCLASS_OLD_CHOSEONG:
244
0
  case HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG:
245
0
    return (HANGULJAMO_CLASS_CHOSEONG);
246
0
  case HANGULJAMO_SUBCLASS_JUNGSEONG:
247
0
  case HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER:
248
0
  case HANGULJAMO_SUBCLASS_OLD_JUNGSEONG:
249
0
  case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG:
250
0
    return (HANGULJAMO_CLASS_JUNGSEONG);
251
0
  case HANGULJAMO_SUBCLASS_JONGSEONG:
252
0
  case HANGULJAMO_SUBCLASS_OLD_JONGSEONG:
253
0
  case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG:
254
0
    return (HANGULJAMO_CLASS_JONGSEONG);
255
5.44k
  case HANGULJAMO_SUBCLASS_NOT_HANGULJAMO:
256
5.44k
    return (HANGULJAMO_CLASS_NOT_HANGULJAMO);
257
5.44k
  }
258
0
  return (HANGULJAMO_CLASS_NOT_HANGULJAMO);
259
5.44k
}
260
261
enum hanguljamo_state
262
hanguljamo_check_state(const struct utf8_data *p_ud, const struct utf8_data *ud)
263
7.00k
{
264
7.00k
  const u_char  *s;
265
266
7.00k
  if (ud->size != 3)
267
1.55k
    return (HANGULJAMO_STATE_NOT_HANGULJAMO);
268
269
5.44k
  switch (hanguljamo_get_class(ud->data)) {
270
0
  case HANGULJAMO_CLASS_CHOSEONG:
271
0
    return (HANGULJAMO_STATE_CHOSEONG);
272
0
  case HANGULJAMO_CLASS_JUNGSEONG:
273
0
    if (p_ud->size < 3)
274
0
      return (HANGULJAMO_STATE_NOT_COMPOSABLE);
275
0
    s = p_ud->data + p_ud->size - 3;
276
0
    if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_CHOSEONG)
277
0
      return (HANGULJAMO_STATE_COMPOSABLE);
278
0
    return (HANGULJAMO_STATE_NOT_COMPOSABLE);
279
0
  case HANGULJAMO_CLASS_JONGSEONG:
280
0
    if (p_ud->size < 3)
281
0
      return (HANGULJAMO_STATE_NOT_COMPOSABLE);
282
0
    s = p_ud->data + p_ud->size - 3;
283
0
    if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_JUNGSEONG)
284
0
      return (HANGULJAMO_STATE_COMPOSABLE);
285
0
    return (HANGULJAMO_STATE_NOT_COMPOSABLE);
286
5.44k
  case HANGULJAMO_CLASS_NOT_HANGULJAMO:
287
5.44k
    return (HANGULJAMO_STATE_NOT_HANGULJAMO);
288
5.44k
  }
289
0
  return (HANGULJAMO_STATE_NOT_HANGULJAMO);
290
5.44k
}