Coverage Report

Created: 2025-07-18 06:55

/src/quickjs/libunicode.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Unicode utilities
3
 *
4
 * Copyright (c) 2017-2018 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#ifndef LIBUNICODE_H
25
#define LIBUNICODE_H
26
27
#include <stdint.h>
28
29
/* define it to include all the unicode tables (40KB larger) */
30
#define CONFIG_ALL_UNICODE
31
32
#define LRE_CC_RES_LEN_MAX 3
33
34
/* char ranges */
35
36
typedef struct {
37
    int len; /* in points, always even */
38
    int size;
39
    uint32_t *points; /* points sorted by increasing value */
40
    void *mem_opaque;
41
    void *(*realloc_func)(void *opaque, void *ptr, size_t size);
42
} CharRange;
43
44
typedef enum {
45
    CR_OP_UNION,
46
    CR_OP_INTER,
47
    CR_OP_XOR,
48
    CR_OP_SUB,
49
} CharRangeOpEnum;
50
51
void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
52
void cr_free(CharRange *cr);
53
int cr_realloc(CharRange *cr, int size);
54
int cr_copy(CharRange *cr, const CharRange *cr1);
55
56
static inline int cr_add_point(CharRange *cr, uint32_t v)
57
20
{
58
20
    if (cr->len >= cr->size) {
59
9
        if (cr_realloc(cr, cr->len + 1))
60
0
            return -1;
61
9
    }
62
20
    cr->points[cr->len++] = v;
63
20
    return 0;
64
20
}
Unexecuted instantiation: quickjs.c:cr_add_point
libregexp.c:cr_add_point
Line
Count
Source
57
20
{
58
20
    if (cr->len >= cr->size) {
59
9
        if (cr_realloc(cr, cr->len + 1))
60
0
            return -1;
61
9
    }
62
20
    cr->points[cr->len++] = v;
63
20
    return 0;
64
20
}
Unexecuted instantiation: libunicode.c:cr_add_point
65
66
static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
67
0
{
68
0
    if ((cr->len + 2) > cr->size) {
69
0
        if (cr_realloc(cr, cr->len + 2))
70
0
            return -1;
71
0
    }
72
0
    cr->points[cr->len++] = c1;
73
0
    cr->points[cr->len++] = c2;
74
0
    return 0;
75
0
}
Unexecuted instantiation: quickjs.c:cr_add_interval
Unexecuted instantiation: libregexp.c:cr_add_interval
Unexecuted instantiation: libunicode.c:cr_add_interval
76
77
int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
78
          const uint32_t *b_pt, int b_len, int op);
79
int cr_op1(CharRange *cr, const uint32_t *b_pt, int b_len, int op);
80
81
static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
82
0
{
83
0
    uint32_t b_pt[2];
84
0
    b_pt[0] = c1;
85
0
    b_pt[1] = c2 + 1;
86
0
    return cr_op1(cr, b_pt, 2, CR_OP_UNION);
87
0
}
Unexecuted instantiation: quickjs.c:cr_union_interval
Unexecuted instantiation: libregexp.c:cr_union_interval
Unexecuted instantiation: libunicode.c:cr_union_interval
88
89
int cr_invert(CharRange *cr);
90
91
int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
92
93
typedef enum {
94
    UNICODE_NFC,
95
    UNICODE_NFD,
96
    UNICODE_NFKC,
97
    UNICODE_NFKD,
98
} UnicodeNormalizationEnum;
99
100
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
101
                      UnicodeNormalizationEnum n_type,
102
                      void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
103
104
/* Unicode character range functions */
105
106
int unicode_script(CharRange *cr, const char *script_name, int is_ext);
107
int unicode_general_category(CharRange *cr, const char *gc_name);
108
int unicode_prop(CharRange *cr, const char *prop_name);
109
110
typedef void UnicodeSequencePropCB(void *opaque, const uint32_t *buf, int len);
111
int unicode_sequence_prop(const char *prop_name, UnicodeSequencePropCB *cb, void *opaque,
112
                          CharRange *cr);
113
114
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
115
int lre_canonicalize(uint32_t c, int is_unicode);
116
117
/* Code point type categories */
118
enum {
119
    UNICODE_C_SPACE  = (1 << 0),
120
    UNICODE_C_DIGIT  = (1 << 1),
121
    UNICODE_C_UPPER  = (1 << 2),
122
    UNICODE_C_LOWER  = (1 << 3),
123
    UNICODE_C_UNDER  = (1 << 4),
124
    UNICODE_C_DOLLAR = (1 << 5),
125
    UNICODE_C_XDIGIT = (1 << 6),
126
};
127
extern uint8_t const lre_ctype_bits[256];
128
129
/* zero or non-zero return value */
130
int lre_is_cased(uint32_t c);
131
int lre_is_case_ignorable(uint32_t c);
132
int lre_is_id_start(uint32_t c);
133
int lre_is_id_continue(uint32_t c);
134
135
144
static inline int lre_is_space_byte(uint8_t c) {
136
144
    return lre_ctype_bits[c] & UNICODE_C_SPACE;
137
144
}
quickjs.c:lre_is_space_byte
Line
Count
Source
135
144
static inline int lre_is_space_byte(uint8_t c) {
136
144
    return lre_ctype_bits[c] & UNICODE_C_SPACE;
137
144
}
Unexecuted instantiation: libregexp.c:lre_is_space_byte
Unexecuted instantiation: libunicode.c:lre_is_space_byte
138
139
145
static inline int lre_is_id_start_byte(uint8_t c) {
140
145
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
141
145
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR);
142
145
}
quickjs.c:lre_is_id_start_byte
Line
Count
Source
139
145
static inline int lre_is_id_start_byte(uint8_t c) {
140
145
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
141
145
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR);
142
145
}
Unexecuted instantiation: libregexp.c:lre_is_id_start_byte
Unexecuted instantiation: libunicode.c:lre_is_id_start_byte
143
144
3.14M
static inline int lre_is_id_continue_byte(uint8_t c) {
145
3.14M
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
146
3.14M
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR |
147
3.14M
                                UNICODE_C_DIGIT);
148
3.14M
}
quickjs.c:lre_is_id_continue_byte
Line
Count
Source
144
3.14M
static inline int lre_is_id_continue_byte(uint8_t c) {
145
3.14M
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
146
3.14M
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR |
147
3.14M
                                UNICODE_C_DIGIT);
148
3.14M
}
Unexecuted instantiation: libregexp.c:lre_is_id_continue_byte
Unexecuted instantiation: libunicode.c:lre_is_id_continue_byte
149
150
int lre_is_space_non_ascii(uint32_t c);
151
152
144
static inline int lre_is_space(uint32_t c) {
153
144
    if (c < 256)
154
144
        return lre_is_space_byte(c);
155
0
    else
156
0
        return lre_is_space_non_ascii(c);
157
144
}
quickjs.c:lre_is_space
Line
Count
Source
152
144
static inline int lre_is_space(uint32_t c) {
153
144
    if (c < 256)
154
144
        return lre_is_space_byte(c);
155
0
    else
156
0
        return lre_is_space_non_ascii(c);
157
144
}
Unexecuted instantiation: libregexp.c:lre_is_space
Unexecuted instantiation: libunicode.c:lre_is_space
158
159
145
static inline int lre_js_is_ident_first(uint32_t c) {
160
145
    if (c < 128) {
161
145
        return lre_is_id_start_byte(c);
162
145
    } else {
163
0
#ifdef CONFIG_ALL_UNICODE
164
0
        return lre_is_id_start(c);
165
#else
166
        return !lre_is_space_non_ascii(c);
167
#endif
168
0
    }
169
145
}
quickjs.c:lre_js_is_ident_first
Line
Count
Source
159
145
static inline int lre_js_is_ident_first(uint32_t c) {
160
145
    if (c < 128) {
161
145
        return lre_is_id_start_byte(c);
162
145
    } else {
163
0
#ifdef CONFIG_ALL_UNICODE
164
0
        return lre_is_id_start(c);
165
#else
166
        return !lre_is_space_non_ascii(c);
167
#endif
168
0
    }
169
145
}
Unexecuted instantiation: libregexp.c:lre_js_is_ident_first
Unexecuted instantiation: libunicode.c:lre_js_is_ident_first
170
171
3.14M
static inline int lre_js_is_ident_next(uint32_t c) {
172
3.14M
    if (c < 128) {
173
3.14M
        return lre_is_id_continue_byte(c);
174
3.14M
    } else {
175
        /* ZWNJ and ZWJ are accepted in identifiers */
176
1
        if (c >= 0x200C && c <= 0x200D)
177
0
            return TRUE;
178
1
#ifdef CONFIG_ALL_UNICODE
179
1
        return lre_is_id_continue(c);
180
#else
181
        return !lre_is_space_non_ascii(c);
182
#endif
183
1
    }
184
3.14M
}
quickjs.c:lre_js_is_ident_next
Line
Count
Source
171
3.14M
static inline int lre_js_is_ident_next(uint32_t c) {
172
3.14M
    if (c < 128) {
173
3.14M
        return lre_is_id_continue_byte(c);
174
3.14M
    } else {
175
        /* ZWNJ and ZWJ are accepted in identifiers */
176
1
        if (c >= 0x200C && c <= 0x200D)
177
0
            return TRUE;
178
1
#ifdef CONFIG_ALL_UNICODE
179
1
        return lre_is_id_continue(c);
180
#else
181
        return !lre_is_space_non_ascii(c);
182
#endif
183
1
    }
184
3.14M
}
Unexecuted instantiation: libregexp.c:lre_js_is_ident_next
Unexecuted instantiation: libunicode.c:lre_js_is_ident_next
185
186
#endif /* LIBUNICODE_H */