Coverage Report

Created: 2025-07-07 10:01

/work/workdir/UnpackedTarball/libexttextcat/src/utf8misc.c
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/***************************************************************************
3
 *   Copyright (C) 2006 by Jocelyn Merand                                  *
4
 *   joc.mer@gmail.com                                                     *
5
 *                                                                         *
6
 * THE BSD LICENSE
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 *
12
 * - Redistributions of source code must retain the above copyright
13
 * notice, this list of conditions and the following disclaimer.
14
 *
15
 * - Redistributions in binary form must reproduce the above copyright
16
 * notice, this list of conditions and the following disclaimer in the
17
 * documentation and/or other materials provided with the
18
 * distribution.
19
 *
20
 * - Neither the name of the WiseGuys Internet B.V. nor the names of
21
 * its contributors may be used to endorse or promote products derived
22
 * from this software without specific prior written permission.
23
 *
24
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
 ***************************************************************************/
36
37
#include "utf8misc.h"
38
/* #include <stdio.h> */
39
40
/**
41
 * These variables are used in character processing functions
42
 * These have been added to manage utf-8 symbols, particularly escape chars
43
 */
44
#ifndef _UTF8_
45
#define _UTF8_
46
#endif
47
48
#ifdef _UTF8_
49
0
#define ESCAPE_MASK 0x80
50
0
#define WEIGHT_MASK 0xF0
51
#else
52
#define ESCAPE_MASK 0xFF
53
#define WEIGHT_MASK 0x00
54
#endif
55
56
const char *utf8_next_char(const char *str)
57
0
{
58
0
    if (*str & ESCAPE_MASK)
59
0
    {
60
        /* 
61
         * if the first bit of the current char is 1 then *str is an escape
62
         * character
63
         */
64
0
        unsigned char escape_char = ((*str & WEIGHT_MASK) << 1);
65
66
        /* 
67
         * and we use it to count (by bit translation) following characters
68
         * (only the weightest part)
69
         */
70
0
        while (escape_char & ESCAPE_MASK && *str)
71
0
        {
72
            /* 
73
             * every step, we move the byte of 1 bit left, when first bit is 0,
74
             * it's finished
75
             */
76
0
            escape_char = escape_char << 1;
77
0
            ++str;
78
0
        }
79
0
    }
80
0
    if (*str)
81
0
    {
82
        /* 
83
         * finally, if we are not on the \0 character, we jump to the next
84
         * character
85
         */
86
0
        ++str;
87
0
    }
88
0
    return str;
89
0
}
90
91
int utf8_charcopy(const char *str, char *dest)
92
0
{
93
94
0
    int pointer = 0;
95
    /* if the first bit of the current char is 1 */
96
0
    if (str[pointer] & ESCAPE_MASK)
97
0
    {
98
        /* 
99
         * then str[pointer] is an escape character and we use it to count
100
         * following characters (only the weightest part)
101
         */
102
0
        unsigned char escape_char = ((str[pointer] & WEIGHT_MASK) << 1);
103
104
        /* 
105
         * every step, we move the byte of 1 bit left, when first bit is 0,
106
         * it's finished
107
         */
108
0
        while (escape_char & ESCAPE_MASK && str[pointer])
109
0
        {
110
0
            dest[pointer] = str[pointer];
111
0
            escape_char = escape_char << 1;
112
0
            ++pointer;
113
0
        }
114
0
    }
115
0
    if (str[pointer])
116
0
    {
117
0
        dest[pointer] = str[pointer];
118
0
        ++pointer;
119
0
    }
120
121
0
    return pointer;
122
0
}
123
124
125
int utf8_issame(char *lex, char *key, int len)
126
0
{
127
    /* printf("[%s] prefix of [%s] with length %i", lex, key, len); */
128
0
    int char_counter = 0;
129
0
    int pointer = 0;
130
0
    while (char_counter < len)
131
0
    {
132
133
        /* if the first bit of the current char is 1 */
134
0
        if (key[pointer] & ESCAPE_MASK)
135
0
        {
136
            /* 
137
             * then key[pointer] is an escape character and we use it to count
138
             * (only the weightest part)
139
             */
140
141
0
            unsigned char escape_char = ((key[pointer] & WEIGHT_MASK) << 1);
142
143
0
            while (escape_char & ESCAPE_MASK && key[pointer] == lex[pointer])
144
0
            {
145
0
                escape_char = escape_char << 1;
146
0
                ++pointer;
147
0
            }
148
0
        }
149
0
        ++char_counter;         /* and we are on a new utf8 character */
150
0
        if (key[pointer] != lex[pointer])
151
0
        {
152
0
            return 0;
153
            /* printf(" NO\n", lex, key, len); */
154
0
        }
155
0
        ++pointer;
156
0
    }
157
0
    if (lex[pointer] != '\0')
158
0
    {
159
0
        return 0;
160
        /* printf(" NO\n"); */
161
0
    }
162
163
    /* printf(" YES\n"); */
164
165
0
    return 1;
166
0
}
167
168
extern int utf8_strlen(const char *str)
169
0
{
170
0
    int char_counter = 0;
171
0
    while (*str)
172
0
    {
173
0
        str = utf8_next_char(str);
174
0
        ++char_counter;         /* and we are on a new utf8 character */
175
0
    }
176
0
    return char_counter;
177
0
}
178
179
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */