/work/workdir/UnpackedTarball/libexttextcat/src/utf8misc.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /*************************************************************************** |
3 | | * Copyright (C) 2006 by Jocelyn Merand * |
4 | | * joc.mer@gmail.com * |
5 | | * * |
6 | | * THE BSD LICENSE |
7 | | * |
8 | | * Redistribution and use in source and binary forms, with or without |
9 | | * modification, are permitted provided that the following conditions |
10 | | * are met: |
11 | | * |
12 | | * - Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * |
15 | | * - Redistributions in binary form must reproduce the above copyright |
16 | | * notice, this list of conditions and the following disclaimer in the |
17 | | * documentation and/or other materials provided with the |
18 | | * distribution. |
19 | | * |
20 | | * - Neither the name of the WiseGuys Internet B.V. nor the names of |
21 | | * its contributors may be used to endorse or promote products derived |
22 | | * from this software without specific prior written permission. |
23 | | * |
24 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
26 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
27 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
28 | | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
29 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
30 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
31 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
32 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
33 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
34 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
35 | | ***************************************************************************/ |
36 | | |
37 | | #include "utf8misc.h" |
38 | | /* #include <stdio.h> */ |
39 | | |
40 | | /** |
41 | | * These variables are used in character processing functions |
42 | | * These have been added to manage utf-8 symbols, particularly escape chars |
43 | | */ |
44 | | #ifndef _UTF8_ |
45 | | #define _UTF8_ |
46 | | #endif |
47 | | |
48 | | #ifdef _UTF8_ |
49 | 0 | #define ESCAPE_MASK 0x80 |
50 | 0 | #define WEIGHT_MASK 0xF0 |
51 | | #else |
52 | | #define ESCAPE_MASK 0xFF |
53 | | #define WEIGHT_MASK 0x00 |
54 | | #endif |
55 | | |
56 | | const char *utf8_next_char(const char *str) |
57 | 0 | { |
58 | 0 | if (*str & ESCAPE_MASK) |
59 | 0 | { |
60 | | /* |
61 | | * if the first bit of the current char is 1 then *str is an escape |
62 | | * character |
63 | | */ |
64 | 0 | unsigned char escape_char = ((*str & WEIGHT_MASK) << 1); |
65 | | |
66 | | /* |
67 | | * and we use it to count (by bit translation) following characters |
68 | | * (only the weightest part) |
69 | | */ |
70 | 0 | while (escape_char & ESCAPE_MASK && *str) |
71 | 0 | { |
72 | | /* |
73 | | * every step, we move the byte of 1 bit left, when first bit is 0, |
74 | | * it's finished |
75 | | */ |
76 | 0 | escape_char = escape_char << 1; |
77 | 0 | ++str; |
78 | 0 | } |
79 | 0 | } |
80 | 0 | if (*str) |
81 | 0 | { |
82 | | /* |
83 | | * finally, if we are not on the \0 character, we jump to the next |
84 | | * character |
85 | | */ |
86 | 0 | ++str; |
87 | 0 | } |
88 | 0 | return str; |
89 | 0 | } |
90 | | |
91 | | int utf8_charcopy(const char *str, char *dest) |
92 | 0 | { |
93 | |
|
94 | 0 | int pointer = 0; |
95 | | /* if the first bit of the current char is 1 */ |
96 | 0 | if (str[pointer] & ESCAPE_MASK) |
97 | 0 | { |
98 | | /* |
99 | | * then str[pointer] is an escape character and we use it to count |
100 | | * following characters (only the weightest part) |
101 | | */ |
102 | 0 | unsigned char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); |
103 | | |
104 | | /* |
105 | | * every step, we move the byte of 1 bit left, when first bit is 0, |
106 | | * it's finished |
107 | | */ |
108 | 0 | while (escape_char & ESCAPE_MASK && str[pointer]) |
109 | 0 | { |
110 | 0 | dest[pointer] = str[pointer]; |
111 | 0 | escape_char = escape_char << 1; |
112 | 0 | ++pointer; |
113 | 0 | } |
114 | 0 | } |
115 | 0 | if (str[pointer]) |
116 | 0 | { |
117 | 0 | dest[pointer] = str[pointer]; |
118 | 0 | ++pointer; |
119 | 0 | } |
120 | |
|
121 | 0 | return pointer; |
122 | 0 | } |
123 | | |
124 | | |
125 | | int utf8_issame(char *lex, char *key, int len) |
126 | 0 | { |
127 | | /* printf("[%s] prefix of [%s] with length %i", lex, key, len); */ |
128 | 0 | int char_counter = 0; |
129 | 0 | int pointer = 0; |
130 | 0 | while (char_counter < len) |
131 | 0 | { |
132 | | |
133 | | /* if the first bit of the current char is 1 */ |
134 | 0 | if (key[pointer] & ESCAPE_MASK) |
135 | 0 | { |
136 | | /* |
137 | | * then key[pointer] is an escape character and we use it to count |
138 | | * (only the weightest part) |
139 | | */ |
140 | |
|
141 | 0 | unsigned char escape_char = ((key[pointer] & WEIGHT_MASK) << 1); |
142 | |
|
143 | 0 | while (escape_char & ESCAPE_MASK && key[pointer] == lex[pointer]) |
144 | 0 | { |
145 | 0 | escape_char = escape_char << 1; |
146 | 0 | ++pointer; |
147 | 0 | } |
148 | 0 | } |
149 | 0 | ++char_counter; /* and we are on a new utf8 character */ |
150 | 0 | if (key[pointer] != lex[pointer]) |
151 | 0 | { |
152 | 0 | return 0; |
153 | | /* printf(" NO\n", lex, key, len); */ |
154 | 0 | } |
155 | 0 | ++pointer; |
156 | 0 | } |
157 | 0 | if (lex[pointer] != '\0') |
158 | 0 | { |
159 | 0 | return 0; |
160 | | /* printf(" NO\n"); */ |
161 | 0 | } |
162 | | |
163 | | /* printf(" YES\n"); */ |
164 | | |
165 | 0 | return 1; |
166 | 0 | } |
167 | | |
168 | | extern int utf8_strlen(const char *str) |
169 | 0 | { |
170 | 0 | int char_counter = 0; |
171 | 0 | while (*str) |
172 | 0 | { |
173 | 0 | str = utf8_next_char(str); |
174 | 0 | ++char_counter; /* and we are on a new utf8 character */ |
175 | 0 | } |
176 | 0 | return char_counter; |
177 | 0 | } |
178 | | |
179 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |