Coverage Report

Created: 2026-01-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gnupg/regexp/utf8.c
Line
Count
Source
1
/**
2
 * UTF-8 utility functions
3
 *
4
 * (c) 2010-2016 Steve Bennett <steveb@workware.net.au>
5
 *
6
 * See LICENCE for licence details.
7
 */
8
9
#include <ctype.h>
10
#include <stdlib.h>
11
#include <string.h>
12
#include <stdio.h>
13
#include <assert.h>
14
#include "utf8.h"
15
16
/* This one is always implemented */
17
int utf8_fromunicode(char *p, unsigned uc)
18
0
{
19
0
    if (uc <= 0x7f) {
20
0
        *p = uc;
21
0
        return 1;
22
0
    }
23
0
    else if (uc <= 0x7ff) {
24
0
        *p++ = 0xc0 | ((uc & 0x7c0) >> 6);
25
0
        *p = 0x80 | (uc & 0x3f);
26
0
        return 2;
27
0
    }
28
0
    else if (uc <= 0xffff) {
29
0
        *p++ = 0xe0 | ((uc & 0xf000) >> 12);
30
0
        *p++ = 0x80 | ((uc & 0xfc0) >> 6);
31
0
        *p = 0x80 | (uc & 0x3f);
32
0
        return 3;
33
0
    }
34
    /* Note: We silently truncate to 21 bits here: 0x1fffff */
35
0
    else {
36
0
        *p++ = 0xf0 | ((uc & 0x1c0000) >> 18);
37
0
        *p++ = 0x80 | ((uc & 0x3f000) >> 12);
38
0
        *p++ = 0x80 | ((uc & 0xfc0) >> 6);
39
0
        *p = 0x80 | (uc & 0x3f);
40
0
        return 4;
41
0
    }
42
0
}
43
44
#if defined(USE_UTF8) && !defined(JIM_BOOTSTRAP)
45
int utf8_charlen(int c)
46
0
{
47
0
    if ((c & 0x80) == 0) {
48
0
        return 1;
49
0
    }
50
0
    if ((c & 0xe0) == 0xc0) {
51
0
        return 2;
52
0
    }
53
0
    if ((c & 0xf0) == 0xe0) {
54
0
        return 3;
55
0
    }
56
0
    if ((c & 0xf8) == 0xf0) {
57
0
        return 4;
58
0
    }
59
    /* Invalid sequence, so treat it as a single byte */
60
0
    return 1;
61
0
}
62
63
int utf8_index(const char *str, int index)
64
0
{
65
0
    const char *s = str;
66
0
    while (index--) {
67
0
        s += utf8_charlen(*s);
68
0
    }
69
0
    return s - str;
70
0
}
71
72
int utf8_tounicode(const char *str, int *uc)
73
0
{
74
0
    unsigned const char *s = (unsigned const char *)str;
75
76
0
    if (s[0] < 0xc0) {
77
0
        *uc = s[0];
78
0
        return 1;
79
0
    }
80
0
    if (s[0] < 0xe0) {
81
0
        if ((s[1] & 0xc0) == 0x80) {
82
0
            *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
83
0
            if (*uc >= 0x80) {
84
0
                return 2;
85
0
            }
86
            /* Otherwise this is an invalid sequence */
87
0
        }
88
0
    }
89
0
    else if (s[0] < 0xf0) {
90
0
        if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
91
0
            *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
92
0
            if (*uc >= 0x800) {
93
0
                return 3;
94
0
            }
95
            /* Otherwise this is an invalid sequence */
96
0
        }
97
0
    }
98
0
    else if (s[0] < 0xf8) {
99
0
        if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) {
100
0
            *uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80);
101
0
            if (*uc >= 0x10000) {
102
0
                return 4;
103
0
            }
104
            /* Otherwise this is an invalid sequence */
105
0
        }
106
0
    }
107
108
    /* Invalid sequence, so just return the byte */
109
0
    *uc = *s;
110
0
    return 1;
111
0
}
112
113
struct casemap {
114
    unsigned short code;        /* code point */
115
    unsigned short altcode;     /* alternate case code point */
116
};
117
118
119
/* Generated mapping tables */
120
#include "_unicode_mapping.c"
121
122
0
#define ARRAYSIZE(A) sizeof(A) / sizeof(*(A))
123
124
static int cmp_casemap(const void *key, const void *cm)
125
0
{
126
0
    return *(int *)key - (int)((const struct casemap *)cm)->code;
127
0
}
128
129
static int utf8_map_case(const struct casemap *mapping, int num, int ch)
130
0
{
131
    /* We only support 16 bit case mapping */
132
0
    if (ch <= 0xffff) {
133
0
        const struct casemap *cm =
134
0
            bsearch(&ch, mapping, num, sizeof(*mapping), cmp_casemap);
135
136
0
        if (cm) {
137
0
            return cm->altcode;
138
0
        }
139
0
    }
140
0
    return ch;
141
0
}
142
143
int utf8_upper(int ch)
144
0
{
145
0
    if (isascii(ch)) {
146
0
        return toupper(ch);
147
0
    }
148
0
    return utf8_map_case(unicode_case_mapping_upper, ARRAYSIZE(unicode_case_mapping_upper), ch);
149
0
}
150
#endif /* JIM_BOOTSTRAP */