/src/systemd/src/basic/gunicode.c
Line | Count | Source |
1 | | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | | /* gunicode.c - Unicode manipulation functions |
3 | | * |
4 | | * Copyright (C) 1999, 2000 Tom Tromey |
5 | | * Copyright © 2000, 2005 Red Hat, Inc. |
6 | | */ |
7 | | |
8 | | #include <stdlib.h> |
9 | | |
10 | | #include "gunicode.h" |
11 | | |
12 | 136M | #define unichar uint32_t |
13 | | |
14 | | /** |
15 | | * g_utf8_prev_char: |
16 | | * @p: a pointer to a position within a UTF-8 encoded string |
17 | | * |
18 | | * Finds the previous UTF-8 character in the string before @p. |
19 | | * |
20 | | * @p does not have to be at the beginning of a UTF-8 character. No check |
21 | | * is made to see if the character found is actually valid other than |
22 | | * it starts with an appropriate byte. If @p might be the first |
23 | | * character of the string, you must use g_utf8_find_prev_char() instead. |
24 | | * |
25 | | * Return value: a pointer to the found character. |
26 | | **/ |
27 | | char * |
28 | | utf8_prev_char (const char *p) |
29 | 12.7k | { |
30 | 12.7k | for (;;) |
31 | 14.5k | { |
32 | 14.5k | p--; |
33 | 14.5k | if ((*p & 0xc0) != 0x80) |
34 | 12.7k | return (char *)p; |
35 | 14.5k | } |
36 | 12.7k | } |
37 | | |
38 | | struct Interval |
39 | | { |
40 | | unichar start, end; |
41 | | }; |
42 | | |
43 | | static int |
44 | | interval_compare (const void *key, const void *elt) |
45 | 136M | { |
46 | 136M | unichar c = (unichar) (long) (key); |
47 | 136M | struct Interval *interval = (struct Interval *)elt; |
48 | | |
49 | 136M | if (c < interval->start) |
50 | 136M | return -1; |
51 | 33.0k | if (c > interval->end) |
52 | 30.0k | return +1; |
53 | | |
54 | 3.03k | return 0; |
55 | 33.0k | } |
56 | | |
57 | | /* |
58 | | * NOTE: |
59 | | * |
60 | | * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are |
61 | | * generated from the Unicode Character Database's file |
62 | | * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py |
63 | | * in this way: |
64 | | * |
65 | | * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt |
66 | | * |
67 | | * Last update for Unicode 6.0. |
68 | | */ |
69 | | |
70 | | /** |
71 | | * g_unichar_iswide: |
72 | | * @c: a Unicode character |
73 | | * |
74 | | * Determines if a character is typically rendered in a double-width |
75 | | * cell. |
76 | | * |
77 | | * Return value: %TRUE if the character is wide |
78 | | **/ |
79 | | bool |
80 | | unichar_iswide (unichar c) |
81 | 22.7M | { |
82 | | /* See NOTE earlier for how to update this table. */ |
83 | 22.7M | static const struct Interval wide[] = { |
84 | 22.7M | {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, |
85 | 22.7M | {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, |
86 | 22.7M | {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, |
87 | 22.7M | {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, |
88 | 22.7M | {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, |
89 | 22.7M | {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, |
90 | 22.7M | {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, |
91 | 22.7M | {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, |
92 | 22.7M | {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, |
93 | 22.7M | {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */ |
94 | 22.7M | {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, |
95 | 22.7M | }; |
96 | | |
97 | 22.7M | if (bsearch ((void *)(uintptr_t)c, wide, ELEMENTSOF(wide), sizeof wide[0], |
98 | 22.7M | interval_compare)) |
99 | 3.03k | return true; |
100 | | |
101 | 22.7M | return false; |
102 | 22.7M | } |
103 | | |
104 | | const char utf8_skip_data[256] = { |
105 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
106 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
107 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
108 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
109 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
110 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
111 | | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
112 | | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 |
113 | | }; |