Coverage Report

Created: 2025-06-24 06:40

/src/systemd/src/basic/gunicode.c
Line
Count
Source
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
/* gunicode.c - Unicode manipulation functions
3
 *
4
 *  Copyright (C) 1999, 2000 Tom Tromey
5
 *  Copyright © 2000, 2005 Red Hat, Inc.
6
 */
7
8
#include <stdlib.h>
9
10
#include "gunicode.h"
11
12
136M
#define unichar uint32_t
13
14
/**
15
 * g_utf8_prev_char:
16
 * @p: a pointer to a position within a UTF-8 encoded string
17
 *
18
 * Finds the previous UTF-8 character in the string before @p.
19
 *
20
 * @p does not have to be at the beginning of a UTF-8 character. No check
21
 * is made to see if the character found is actually valid other than
22
 * it starts with an appropriate byte. If @p might be the first
23
 * character of the string, you must use g_utf8_find_prev_char() instead.
24
 *
25
 * Return value: a pointer to the found character.
26
 **/
27
char *
28
utf8_prev_char (const char *p)
29
12.7k
{
30
12.7k
  for (;;)
31
14.5k
    {
32
14.5k
      p--;
33
14.5k
      if ((*p & 0xc0) != 0x80)
34
12.7k
        return (char *)p;
35
14.5k
    }
36
12.7k
}
37
38
struct Interval
39
{
40
  unichar start, end;
41
};
42
43
static int
44
interval_compare (const void *key, const void *elt)
45
136M
{
46
136M
  unichar c = (unichar) (long) (key);
47
136M
  struct Interval *interval = (struct Interval *)elt;
48
49
136M
  if (c < interval->start)
50
136M
    return -1;
51
33.0k
  if (c > interval->end)
52
30.0k
    return +1;
53
54
3.03k
  return 0;
55
33.0k
}
56
57
/*
58
 * NOTE:
59
 *
60
 * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
61
 * generated from the Unicode Character Database's file
62
 * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
63
 * in this way:
64
 *
65
 *   ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
66
 *
67
 * Last update for Unicode 6.0.
68
 */
69
70
/**
71
 * g_unichar_iswide:
72
 * @c: a Unicode character
73
 *
74
 * Determines if a character is typically rendered in a double-width
75
 * cell.
76
 *
77
 * Return value: %TRUE if the character is wide
78
 **/
79
bool
80
unichar_iswide (unichar c)
81
22.7M
{
82
  /* See NOTE earlier for how to update this table. */
83
22.7M
  static const struct Interval wide[] = {
84
22.7M
    {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
85
22.7M
    {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
86
22.7M
    {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
87
22.7M
    {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
88
22.7M
    {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
89
22.7M
    {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
90
22.7M
    {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
91
22.7M
    {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
92
22.7M
    {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
93
22.7M
    {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
94
22.7M
    {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
95
22.7M
  };
96
97
22.7M
  if (bsearch ((void *)(uintptr_t)c, wide, ELEMENTSOF(wide), sizeof wide[0],
98
22.7M
               interval_compare))
99
3.03k
    return true;
100
101
22.7M
  return false;
102
22.7M
}
103
104
const char utf8_skip_data[256] = {
105
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
106
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
107
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
110
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
111
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
112
  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
113
};