/src/xpdf-4.06/xpdf/UTF8.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // UTF8.cc |
4 | | // |
5 | | // Copyright 2001-2017 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | #include "UTF8.h" |
11 | | |
12 | 111k | int mapUTF8(Unicode u, char *buf, int bufSize) { |
13 | 111k | if (u <= 0x0000007f) { |
14 | 93.2k | if (bufSize < 1) { |
15 | 0 | return 0; |
16 | 0 | } |
17 | 93.2k | buf[0] = (char)u; |
18 | 93.2k | return 1; |
19 | 93.2k | } else if (u <= 0x000007ff) { |
20 | 15.2k | if (bufSize < 2) { |
21 | 0 | return 0; |
22 | 0 | } |
23 | 15.2k | buf[0] = (char)(0xc0 + (u >> 6)); |
24 | 15.2k | buf[1] = (char)(0x80 + (u & 0x3f)); |
25 | 15.2k | return 2; |
26 | 15.2k | } else if (u <= 0x0000ffff) { |
27 | 3.26k | if (bufSize < 3) { |
28 | 0 | return 0; |
29 | 0 | } |
30 | 3.26k | buf[0] = (char)(0xe0 + (u >> 12)); |
31 | 3.26k | buf[1] = (char)(0x80 + ((u >> 6) & 0x3f)); |
32 | 3.26k | buf[2] = (char)(0x80 + (u & 0x3f)); |
33 | 3.26k | return 3; |
34 | 3.26k | } else if (u <= 0x0010ffff) { |
35 | 24 | if (bufSize < 4) { |
36 | 0 | return 0; |
37 | 0 | } |
38 | 24 | buf[0] = (char)(0xf0 + (u >> 18)); |
39 | 24 | buf[1] = (char)(0x80 + ((u >> 12) & 0x3f)); |
40 | 24 | buf[2] = (char)(0x80 + ((u >> 6) & 0x3f)); |
41 | 24 | buf[3] = (char)(0x80 + (u & 0x3f)); |
42 | 24 | return 4; |
43 | 24 | } else { |
44 | 20 | return 0; |
45 | 20 | } |
46 | 111k | } |
47 | | |
48 | 0 | int mapUCS2(Unicode u, char *buf, int bufSize) { |
49 | 0 | if (u <= 0xffff) { |
50 | 0 | if (bufSize < 2) { |
51 | 0 | return 0; |
52 | 0 | } |
53 | 0 | buf[0] = (char)((u >> 8) & 0xff); |
54 | 0 | buf[1] = (char)(u & 0xff); |
55 | 0 | return 2; |
56 | 0 | } else { |
57 | 0 | return 0; |
58 | 0 | } |
59 | 0 | } |
60 | | |
61 | 18.5k | GBool getUTF8(GString *s, int *i, Unicode *u) { |
62 | 18.5k | Guchar c0, c1, c2, c3, c4, c5; |
63 | | |
64 | 18.5k | if (*i >= s->getLength()) { |
65 | 706 | return gFalse; |
66 | 706 | } |
67 | 17.8k | c0 = (Guchar)s->getChar((*i)++); |
68 | 17.8k | if (c0 < 0x80) { |
69 | 17.8k | *u = (Unicode)c0; |
70 | 17.8k | } else if (c0 < 0xe0) { |
71 | 0 | if (*i < s->getLength() && |
72 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) { |
73 | 0 | *i += 1; |
74 | 0 | *u = (Unicode)(((c0 & 0x1f) << 6) | |
75 | 0 | (c1 & 0x3f)); |
76 | 0 | } else { |
77 | 0 | *u = (Unicode)c0; |
78 | 0 | } |
79 | 0 | } else if (c0 < 0xf0) { |
80 | 0 | if (*i < s->getLength() - 1 && |
81 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
82 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) { |
83 | 0 | *i += 2; |
84 | 0 | *u = (Unicode)(((c0 & 0x0f) << 12) | |
85 | 0 | ((c1 & 0x3f) << 6) | |
86 | 0 | (c2 & 0x3f)); |
87 | 0 | } else { |
88 | 0 | *u = (Unicode)c0; |
89 | 0 | } |
90 | 0 | } else if (c0 < 0xf8) { |
91 | 0 | if (*i < s->getLength() - 2 && |
92 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
93 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
94 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) { |
95 | 0 | *i += 3; |
96 | 0 | *u = (Unicode)(((c0 & 0x07) << 18) | |
97 | 0 | ((c1 & 0x3f) << 12) | |
98 | 0 | ((c2 & 0x3f) << 6) | |
99 | 0 | (c3 & 0x3f)); |
100 | 0 | } else { |
101 | 0 | *u = (Unicode)c0; |
102 | 0 | } |
103 | 0 | } else if (c0 < 0xfc) { |
104 | 0 | if (*i < s->getLength() - 3 && |
105 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
106 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
107 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 && |
108 | 0 | ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) { |
109 | 0 | *i += 4; |
110 | 0 | *u = (Unicode)(((c0 & 0x03) << 24) | |
111 | 0 | ((c1 & 0x3f) << 18) | |
112 | 0 | ((c2 & 0x3f) << 12) | |
113 | 0 | ((c3 & 0x3f) << 6) | |
114 | 0 | (c4 & 0x3f)); |
115 | 0 | } else { |
116 | 0 | *u = (Unicode)c0; |
117 | 0 | } |
118 | 0 | } else if (c0 < 0xfe) { |
119 | 0 | if (*i < s->getLength() - 4 && |
120 | 0 | ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 && |
121 | 0 | ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 && |
122 | 0 | ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 && |
123 | 0 | ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 && |
124 | 0 | ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) { |
125 | 0 | *i += 5; |
126 | 0 | *u = (Unicode)(((c0 & 0x01) << 30) | |
127 | 0 | ((c1 & 0x3f) << 24) | |
128 | 0 | ((c2 & 0x3f) << 18) | |
129 | 0 | ((c3 & 0x3f) << 12) | |
130 | 0 | ((c4 & 0x3f) << 6) | |
131 | 0 | (c5 & 0x3f)); |
132 | 0 | } else { |
133 | 0 | *u = (Unicode)c0; |
134 | 0 | } |
135 | 0 | } else { |
136 | 0 | *u = (Unicode)c0; |
137 | 0 | } |
138 | 17.8k | return gTrue; |
139 | 18.5k | } |
140 | | |
141 | 695k | GBool getUTF16BE(GString *s, int *i, Unicode *u) { |
142 | 695k | int w0, w1; |
143 | | |
144 | 695k | if (*i >= s->getLength() - 1) { |
145 | 5.94k | return gFalse; |
146 | 5.94k | } |
147 | 689k | w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff); |
148 | 689k | *i += 2; |
149 | 689k | if (w0 < 0xd800 || w0 >= 0xe000) { |
150 | 688k | *u = (Unicode)w0; |
151 | 688k | } else { |
152 | 1.07k | if (*i < s->getLength() - 1) { |
153 | 1.06k | w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff); |
154 | 1.06k | *i += 2; |
155 | 1.06k | *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00); |
156 | 1.06k | } else { |
157 | 12 | *u = (Unicode)w0; |
158 | 12 | } |
159 | 1.07k | } |
160 | 689k | return gTrue; |
161 | 695k | } |
162 | | |
163 | 90.9k | GBool getUTF16LE(GString *s, int *i, Unicode *u) { |
164 | 90.9k | int w0, w1; |
165 | | |
166 | 90.9k | if (*i >= s->getLength() - 1) { |
167 | 565 | return gFalse; |
168 | 565 | } |
169 | 90.3k | w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8); |
170 | 90.3k | *i += 2; |
171 | 90.3k | if (w0 < 0xd800 || w0 >= 0xe000) { |
172 | 89.4k | *u = (Unicode)w0; |
173 | 89.4k | } else { |
174 | 973 | if (*i < s->getLength() - 1) { |
175 | 928 | w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8); |
176 | 928 | *i += 2; |
177 | 928 | *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00); |
178 | 928 | } else { |
179 | 45 | *u = (Unicode)w0; |
180 | 45 | } |
181 | 973 | } |
182 | 90.3k | return gTrue; |
183 | 90.9k | } |