/src/xpdf-4.05/xpdf/UTF8.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // UTF8.cc |
4 | | // |
5 | | // Copyright 2001-2017 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | #include "UTF8.h" |
11 | | |
12 | 7.76k | int mapUTF8(Unicode u, char *buf, int bufSize) { |
13 | 7.76k | if (u <= 0x0000007f) { |
14 | 6.43k | if (bufSize < 1) { |
15 | 0 | return 0; |
16 | 0 | } |
17 | 6.43k | buf[0] = (char)u; |
18 | 6.43k | return 1; |
19 | 6.43k | } else if (u <= 0x000007ff) { |
20 | 923 | if (bufSize < 2) { |
21 | 0 | return 0; |
22 | 0 | } |
23 | 923 | buf[0] = (char)(0xc0 + (u >> 6)); |
24 | 923 | buf[1] = (char)(0x80 + (u & 0x3f)); |
25 | 923 | return 2; |
26 | 923 | } else if (u <= 0x0000ffff) { |
27 | 355 | if (bufSize < 3) { |
28 | 0 | return 0; |
29 | 0 | } |
30 | 355 | buf[0] = (char)(0xe0 + (u >> 12)); |
31 | 355 | buf[1] = (char)(0x80 + ((u >> 6) & 0x3f)); |
32 | 355 | buf[2] = (char)(0x80 + (u & 0x3f)); |
33 | 355 | return 3; |
34 | 355 | } else if (u <= 0x0010ffff) { |
35 | 14 | if (bufSize < 4) { |
36 | 0 | return 0; |
37 | 0 | } |
38 | 14 | buf[0] = (char)(0xf0 + (u >> 18)); |
39 | 14 | buf[1] = (char)(0x80 + ((u >> 12) & 0x3f)); |
40 | 14 | buf[2] = (char)(0x80 + ((u >> 6) & 0x3f)); |
41 | 14 | buf[3] = (char)(0x80 + (u & 0x3f)); |
42 | 14 | return 4; |
43 | 31 | } else { |
44 | 31 | return 0; |
45 | 31 | } |
46 | 7.76k | } |
47 | | |
48 | 0 | int mapUCS2(Unicode u, char *buf, int bufSize) { |
49 | 0 | if (u <= 0xffff) { |
50 | 0 | if (bufSize < 2) { |
51 | 0 | return 0; |
52 | 0 | } |
53 | 0 | buf[0] = (char)((u >> 8) & 0xff); |
54 | 0 | buf[1] = (char)(u & 0xff); |
55 | 0 | return 2; |
56 | 0 | } else { |
57 | 0 | return 0; |
58 | 0 | } |
59 | 0 | } |
60 | | |
61 | 0 | GBool getUTF8(GString *s, int *i, Unicode *u) { |
62 | 0 | Guchar c0, c1, c2, c3, c4, c5; |
63 | |
|
64 | 0 | if (*i >= s->getLength()) { |
65 | 0 | return gFalse; |
66 | 0 | } |
67 | 0 | c0 = (Guchar)s->getChar((*i)++); |
68 | 0 | if (c0 < 0x80) { |
69 | 0 | *u = (Unicode)c0; |
70 | 0 | } else if (c0 < 0xe0) { |
71 | 0 | if (*i < s->getLength() && |
72 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) { |
73 | 0 | *i += 1; |
74 | 0 | *u = (Unicode)(((c0 & 0x1f) << 6) | |
75 | 0 | (c1 & 0x3f)); |
76 | 0 | } else { |
77 | 0 | *u = (Unicode)c0; |
78 | 0 | } |
79 | 0 | } else if (c0 < 0xf0) { |
80 | 0 | if (*i < s->getLength() - 1 && |
81 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
82 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) { |
83 | 0 | *i += 2; |
84 | 0 | *u = (Unicode)(((c0 & 0x0f) << 12) | |
85 | 0 | ((c1 & 0x3f) << 6) | |
86 | 0 | (c2 & 0x3f)); |
87 | 0 | } else { |
88 | 0 | *u = (Unicode)c0; |
89 | 0 | } |
90 | 0 | } else if (c0 < 0xf8) { |
91 | 0 | if (*i < s->getLength() - 2 && |
92 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
93 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
94 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) { |
95 | 0 | *i += 3; |
96 | 0 | *u = (Unicode)(((c0 & 0x07) << 18) | |
97 | 0 | ((c1 & 0x3f) << 12) | |
98 | 0 | ((c2 & 0x3f) << 6) | |
99 | 0 | (c3 & 0x3f)); |
100 | 0 | } else { |
101 | 0 | *u = (Unicode)c0; |
102 | 0 | } |
103 | 0 | } else if (c0 < 0xfc) { |
104 | 0 | if (*i < s->getLength() - 3 && |
105 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
106 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
107 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 && |
108 | 0 | ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) { |
109 | 0 | *i += 4; |
110 | 0 | *u = (Unicode)(((c0 & 0x03) << 24) | |
111 | 0 | ((c1 & 0x3f) << 18) | |
112 | 0 | ((c2 & 0x3f) << 12) | |
113 | 0 | ((c3 & 0x3f) << 6) | |
114 | 0 | (c4 & 0x3f)); |
115 | 0 | } else { |
116 | 0 | *u = (Unicode)c0; |
117 | 0 | } |
118 | 0 | } else if (c0 < 0xfe) { |
119 | 0 | if (*i < s->getLength() - 4 && |
120 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
121 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
122 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 && |
123 | 0 | ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 && |
124 | 0 | ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) { |
125 | 0 | *i += 5; |
126 | 0 | *u = (Unicode)(((c0 & 0x01) << 30) | |
127 | 0 | ((c1 & 0x3f) << 24) | |
128 | 0 | ((c2 & 0x3f) << 18) | |
129 | 0 | ((c3 & 0x3f) << 12) | |
130 | 0 | ((c4 & 0x3f) << 6) | |
131 | 0 | (c5 & 0x3f)); |
132 | 0 | } else { |
133 | 0 | *u = (Unicode)c0; |
134 | 0 | } |
135 | 0 | } else { |
136 | 0 | *u = (Unicode)c0; |
137 | 0 | } |
138 | 0 | return gTrue; |
139 | 0 | } |
140 | | |
141 | 6.62M | GBool getUTF16BE(GString *s, int *i, Unicode *u) { |
142 | 6.62M | int w0, w1; |
143 | | |
144 | 6.62M | if (*i >= s->getLength() - 1) { |
145 | 16.5k | return gFalse; |
146 | 16.5k | } |
147 | 6.61M | w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff); |
148 | 6.61M | *i += 2; |
149 | 6.61M | if (w0 < 0xd800 || w0 >= 0xe000) { |
150 | 6.47M | *u = (Unicode)w0; |
151 | 6.47M | } else { |
152 | 135k | if (*i < s->getLength() - 1) { |
153 | 134k | w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff); |
154 | 134k | *i += 2; |
155 | 134k | *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00); |
156 | 134k | } else { |
157 | 906 | *u = (Unicode)w0; |
158 | 906 | } |
159 | 135k | } |
160 | 6.61M | return gTrue; |
161 | 6.62M | } |
162 | | |
163 | 14.4M | GBool getUTF16LE(GString *s, int *i, Unicode *u) { |
164 | 14.4M | int w0, w1; |
165 | | |
166 | 14.4M | if (*i >= s->getLength() - 1) { |
167 | 1.86k | return gFalse; |
168 | 1.86k | } |
169 | 14.4M | w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8); |
170 | 14.4M | *i += 2; |
171 | 14.4M | if (w0 < 0xd800 || w0 >= 0xe000) { |
172 | 14.4M | *u = (Unicode)w0; |
173 | 14.4M | } else { |
174 | 27.7k | if (*i < s->getLength() - 1) { |
175 | 27.5k | w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8); |
176 | 27.5k | *i += 2; |
177 | 27.5k | *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00); |
178 | 27.5k | } else { |
179 | 217 | *u = (Unicode)w0; |
180 | 217 | } |
181 | 27.7k | } |
182 | 14.4M | return gTrue; |
183 | 14.4M | } |