Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/quickjs/libunicode.h
Line
Count
Source
1
/*
2
 * Unicode utilities
3
 *
4
 * Copyright (c) 2017-2018 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#ifndef LIBUNICODE_H
25
#define LIBUNICODE_H
26
27
#include <stdint.h>
28
29
/* unicode standard version */
30
#define LIBUNICODE_UNICODE_VERSION_MAJOR 17
31
#define LIBUNICODE_UNICODE_VERSION_MINOR 0
32
#define LIBUNICODE_UNICODE_VERSION_PATCH 0
33
34
/* define it to include all the unicode tables (40KB larger) */
35
#define CONFIG_ALL_UNICODE
36
37
#define LRE_CC_RES_LEN_MAX 3
38
39
/* char ranges */
40
41
typedef struct {
42
    int len; /* in points, always even */
43
    int size;
44
    uint32_t *points; /* points sorted by increasing value */
45
    void *mem_opaque;
46
    void *(*realloc_func)(void *opaque, void *ptr, size_t size);
47
} CharRange;
48
49
typedef enum {
50
    CR_OP_UNION,
51
    CR_OP_INTER,
52
    CR_OP_XOR,
53
    CR_OP_SUB,
54
} CharRangeOpEnum;
55
56
void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
57
void cr_free(CharRange *cr);
58
int cr_realloc(CharRange *cr, int size);
59
int cr_copy(CharRange *cr, const CharRange *cr1);
60
61
static inline int cr_add_point(CharRange *cr, uint32_t v)
62
54.0M
{
63
54.0M
    if (cr->len >= cr->size) {
64
976k
        if (cr_realloc(cr, cr->len + 1))
65
0
            return -1;
66
976k
    }
67
54.0M
    cr->points[cr->len++] = v;
68
54.0M
    return 0;
69
54.0M
}
Unexecuted instantiation: libregexp.c:cr_add_point
libunicode.c:cr_add_point
Line
Count
Source
62
54.0M
{
63
54.0M
    if (cr->len >= cr->size) {
64
976k
        if (cr_realloc(cr, cr->len + 1))
65
0
            return -1;
66
976k
    }
67
54.0M
    cr->points[cr->len++] = v;
68
54.0M
    return 0;
69
54.0M
}
70
71
static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
72
12.8M
{
73
12.8M
    if ((cr->len + 2) > cr->size) {
74
327k
        if (cr_realloc(cr, cr->len + 2))
75
0
            return -1;
76
327k
    }
77
12.8M
    cr->points[cr->len++] = c1;
78
12.8M
    cr->points[cr->len++] = c2;
79
12.8M
    return 0;
80
12.8M
}
libregexp.c:cr_add_interval
Line
Count
Source
72
9.84k
{
73
9.84k
    if ((cr->len + 2) > cr->size) {
74
9.84k
        if (cr_realloc(cr, cr->len + 2))
75
0
            return -1;
76
9.84k
    }
77
9.84k
    cr->points[cr->len++] = c1;
78
9.84k
    cr->points[cr->len++] = c2;
79
9.84k
    return 0;
80
9.84k
}
libunicode.c:cr_add_interval
Line
Count
Source
72
12.8M
{
73
12.8M
    if ((cr->len + 2) > cr->size) {
74
317k
        if (cr_realloc(cr, cr->len + 2))
75
0
            return -1;
76
317k
    }
77
12.8M
    cr->points[cr->len++] = c1;
78
12.8M
    cr->points[cr->len++] = c2;
79
12.8M
    return 0;
80
12.8M
}
81
82
int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
83
          const uint32_t *b_pt, int b_len, int op);
84
int cr_op1(CharRange *cr, const uint32_t *b_pt, int b_len, int op);
85
86
static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
87
40.6k
{
88
40.6k
    uint32_t b_pt[2];
89
40.6k
    b_pt[0] = c1;
90
40.6k
    b_pt[1] = c2 + 1;
91
40.6k
    return cr_op1(cr, b_pt, 2, CR_OP_UNION);
92
40.6k
}
libregexp.c:cr_union_interval
Line
Count
Source
87
40.6k
{
88
40.6k
    uint32_t b_pt[2];
89
40.6k
    b_pt[0] = c1;
90
40.6k
    b_pt[1] = c2 + 1;
91
40.6k
    return cr_op1(cr, b_pt, 2, CR_OP_UNION);
92
40.6k
}
Unexecuted instantiation: libunicode.c:cr_union_interval
93
94
int cr_invert(CharRange *cr);
95
96
int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
97
98
typedef enum {
99
    UNICODE_NFC,
100
    UNICODE_NFD,
101
    UNICODE_NFKC,
102
    UNICODE_NFKD,
103
} UnicodeNormalizationEnum;
104
105
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
106
                      UnicodeNormalizationEnum n_type,
107
                      void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
108
109
/* Unicode character range functions */
110
111
int unicode_script(CharRange *cr, const char *script_name, int is_ext);
112
int unicode_general_category(CharRange *cr, const char *gc_name);
113
int unicode_prop(CharRange *cr, const char *prop_name);
114
115
typedef void UnicodeSequencePropCB(void *opaque, const uint32_t *buf, int len);
116
int unicode_sequence_prop(const char *prop_name, UnicodeSequencePropCB *cb, void *opaque,
117
                          CharRange *cr);
118
119
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
120
int lre_canonicalize(uint32_t c, int is_unicode);
121
122
/* Code point type categories */
123
enum {
124
    UNICODE_C_SPACE  = (1 << 0),
125
    UNICODE_C_DIGIT  = (1 << 1),
126
    UNICODE_C_UPPER  = (1 << 2),
127
    UNICODE_C_LOWER  = (1 << 3),
128
    UNICODE_C_UNDER  = (1 << 4),
129
    UNICODE_C_DOLLAR = (1 << 5),
130
    UNICODE_C_XDIGIT = (1 << 6),
131
};
132
extern uint8_t const lre_ctype_bits[256];
133
134
/* zero or non-zero return value */
135
int lre_is_cased(uint32_t c);
136
int lre_is_case_ignorable(uint32_t c);
137
int lre_is_id_start(uint32_t c);
138
int lre_is_id_continue(uint32_t c);
139
140
0
static inline int lre_is_space_byte(uint8_t c) {
141
0
    return lre_ctype_bits[c] & UNICODE_C_SPACE;
142
0
}
Unexecuted instantiation: libregexp.c:lre_is_space_byte
Unexecuted instantiation: libunicode.c:lre_is_space_byte
143
144
0
static inline int lre_is_id_start_byte(uint8_t c) {
145
0
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
146
0
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR);
147
0
}
Unexecuted instantiation: libregexp.c:lre_is_id_start_byte
Unexecuted instantiation: libunicode.c:lre_is_id_start_byte
148
149
0
static inline int lre_is_id_continue_byte(uint8_t c) {
150
0
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
151
0
                                UNICODE_C_UNDER | UNICODE_C_DOLLAR |
152
0
                                UNICODE_C_DIGIT);
153
0
}
Unexecuted instantiation: libregexp.c:lre_is_id_continue_byte
Unexecuted instantiation: libunicode.c:lre_is_id_continue_byte
154
155
0
static inline int lre_is_word_byte(uint8_t c) {
156
0
    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
157
0
                                UNICODE_C_UNDER | UNICODE_C_DIGIT);
158
0
}
Unexecuted instantiation: libregexp.c:lre_is_word_byte
Unexecuted instantiation: libunicode.c:lre_is_word_byte
159
160
int lre_is_space_non_ascii(uint32_t c);
161
162
0
static inline int lre_is_space(uint32_t c) {
163
0
    if (c < 256)
164
0
        return lre_is_space_byte(c);
165
0
    else
166
0
        return lre_is_space_non_ascii(c);
167
0
}
Unexecuted instantiation: libregexp.c:lre_is_space
Unexecuted instantiation: libunicode.c:lre_is_space
168
169
0
static inline int lre_js_is_ident_first(uint32_t c) {
170
0
    if (c < 128) {
171
0
        return lre_is_id_start_byte(c);
172
0
    } else {
173
0
#ifdef CONFIG_ALL_UNICODE
174
0
        return lre_is_id_start(c);
175
#else
176
        return !lre_is_space_non_ascii(c);
177
#endif
178
0
    }
179
0
}
Unexecuted instantiation: libregexp.c:lre_js_is_ident_first
Unexecuted instantiation: libunicode.c:lre_js_is_ident_first
180
181
0
static inline int lre_js_is_ident_next(uint32_t c) {
182
0
    if (c < 128) {
183
0
        return lre_is_id_continue_byte(c);
184
0
    } else {
185
        /* ZWNJ and ZWJ are accepted in identifiers */
186
0
        if (c >= 0x200C && c <= 0x200D)
187
0
            return TRUE;
188
0
#ifdef CONFIG_ALL_UNICODE
189
0
        return lre_is_id_continue(c);
190
#else
191
        return !lre_is_space_non_ascii(c);
192
#endif
193
0
    }
194
0
}
Unexecuted instantiation: libregexp.c:lre_js_is_ident_next
Unexecuted instantiation: libunicode.c:lre_js_is_ident_next
195
196
#endif /* LIBUNICODE_H */