/src/xpdf-4.06/xpdf/TextString.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // TextString.cc |
4 | | // |
5 | | // Copyright 2011-2013 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <string.h> |
12 | | #include "gmem.h" |
13 | | #include "gmempp.h" |
14 | | #include "GString.h" |
15 | | #include "PDFDocEncoding.h" |
16 | | #include "UTF8.h" |
17 | | #include "TextString.h" |
18 | | |
19 | | //------------------------------------------------------------------------ |
20 | | |
21 | 89.3k | TextString::TextString() { |
22 | 89.3k | u = NULL; |
23 | 89.3k | len = size = 0; |
24 | 89.3k | } |
25 | | |
26 | 15.0k | TextString::TextString(GString *s) { |
27 | 15.0k | u = NULL; |
28 | 15.0k | len = size = 0; |
29 | 15.0k | append(s); |
30 | 15.0k | } |
31 | | |
32 | 1.22k | TextString::TextString(TextString *s) { |
33 | 1.22k | len = size = s->len; |
34 | 1.22k | if (len) { |
35 | 509 | u = (Unicode *)gmallocn(size, sizeof(Unicode)); |
36 | 509 | memcpy(u, s->u, len * sizeof(Unicode)); |
37 | 716 | } else { |
38 | 716 | u = NULL; |
39 | 716 | } |
40 | 1.22k | } |
41 | | |
42 | 105k | TextString::~TextString() { |
43 | 105k | gfree(u); |
44 | 105k | } |
45 | | |
46 | 0 | TextString *TextString::append(Unicode c) { |
47 | 0 | expand(1); |
48 | 0 | u[len] = c; |
49 | 0 | ++len; |
50 | 0 | return this; |
51 | 0 | } |
52 | | |
53 | 15.0k | TextString *TextString::append(GString *s) { |
54 | 15.0k | return insert(len, s); |
55 | 15.0k | } |
56 | | |
57 | 28.6k | TextString *TextString::insert(int idx, Unicode c) { |
58 | 28.6k | if (idx >= 0 && idx <= len) { |
59 | 28.6k | expand(1); |
60 | 28.6k | if (idx < len) { |
61 | 28.6k | memmove(u + idx + 1, u + idx, (len - idx) * sizeof(Unicode)); |
62 | 28.6k | } |
63 | 28.6k | u[idx] = c; |
64 | 28.6k | ++len; |
65 | 28.6k | } |
66 | 28.6k | return this; |
67 | 28.6k | } |
68 | | |
69 | 195k | TextString *TextString::insert(int idx, Unicode *u2, int n) { |
70 | 195k | if (idx >= 0 && idx <= len) { |
71 | 195k | expand(n); |
72 | 195k | if (idx < len) { |
73 | 130k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
74 | 130k | } |
75 | 195k | memcpy(u + idx, u2, n * sizeof(Unicode)); |
76 | 195k | len += n; |
77 | 195k | } |
78 | 195k | return this; |
79 | 195k | } |
80 | | |
81 | 44.3k | TextString *TextString::insert(int idx, GString *s) { |
82 | 44.3k | Unicode uBuf[100]; |
83 | 44.3k | int n, i; |
84 | | |
85 | 44.3k | if (idx >= 0 && idx <= len) { |
86 | | // look for a UTF-16BE BOM |
87 | 44.3k | if ((s->getChar(0) & 0xff) == 0xfe && |
88 | 23.6k | (s->getChar(1) & 0xff) == 0xff) { |
89 | 23.2k | i = 2; |
90 | 23.2k | n = 0; |
91 | 3.30M | while (getUTF16BE(s, &i, uBuf + n)) { |
92 | 3.27M | ++n; |
93 | 3.27M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
94 | 27.5k | insert(idx, uBuf, n); |
95 | 27.5k | idx += n; |
96 | 27.5k | n = 0; |
97 | 27.5k | } |
98 | 3.27M | } |
99 | 23.2k | if (n > 0) { |
100 | 22.8k | insert(idx, uBuf, n); |
101 | 22.8k | } |
102 | | |
103 | | // look for a UTF-16LE BOM |
104 | | // (technically, this isn't allowed by the PDF spec, but some |
105 | | // PDF files use it) |
106 | 23.2k | } else if ((s->getChar(0) & 0xff) == 0xff && |
107 | 4.14k | (s->getChar(1) & 0xff) == 0xfe) { |
108 | 3.82k | i = 2; |
109 | 3.82k | n = 0; |
110 | 10.0M | while (getUTF16LE(s, &i, uBuf + n)) { |
111 | 9.99M | ++n; |
112 | 9.99M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
113 | 99.1k | insert(idx, uBuf, n); |
114 | 99.1k | idx += n; |
115 | 99.1k | n = 0; |
116 | 99.1k | } |
117 | 9.99M | } |
118 | 3.82k | if (n > 0) { |
119 | 1.91k | insert(idx, uBuf, n); |
120 | 1.91k | } |
121 | | |
122 | | // look for a UTF-8 BOM |
123 | 17.2k | } else if ((s->getChar(0) & 0xff) == 0xef && |
124 | 1.50k | (s->getChar(1) & 0xff) == 0xbb && |
125 | 1.32k | (s->getChar(2) & 0xff) == 0xbf) { |
126 | 1.06k | i = 3; |
127 | 1.06k | n = 0; |
128 | 4.33M | while (getUTF8(s, &i, uBuf + n)) { |
129 | 4.33M | ++n; |
130 | 4.33M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
131 | 42.7k | insert(idx, uBuf, n); |
132 | 42.7k | idx += n; |
133 | 42.7k | n = 0; |
134 | 42.7k | } |
135 | 4.33M | } |
136 | 1.06k | if (n > 0) { |
137 | 986 | insert(idx, uBuf, n); |
138 | 986 | } |
139 | | |
140 | | // otherwise, use PDFDocEncoding |
141 | 16.1k | } else { |
142 | 16.1k | n = s->getLength(); |
143 | 16.1k | expand(n); |
144 | 16.1k | if (idx < len) { |
145 | 7.85k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
146 | 7.85k | } |
147 | 24.0M | for (i = 0; i < n; ++i) { |
148 | 24.0M | u[idx + i] = pdfDocEncoding[s->getChar(i) & 0xff]; |
149 | 24.0M | } |
150 | 16.1k | len += n; |
151 | 16.1k | } |
152 | 44.3k | } |
153 | 44.3k | return this; |
154 | 44.3k | } |
155 | | |
156 | 240k | void TextString::expand(int delta) { |
157 | 240k | int newLen; |
158 | | |
159 | 240k | newLen = len + delta; |
160 | 240k | if (delta > INT_MAX - len) { |
161 | | // trigger an out-of-memory error |
162 | 0 | size = -1; |
163 | 240k | } else if (newLen <= size) { |
164 | 212k | return; |
165 | 212k | } else if (size > 0 && size <= INT_MAX / 2 && size*2 >= newLen) { |
166 | 14.1k | size *= 2; |
167 | 14.1k | } else { |
168 | 13.0k | size = newLen; |
169 | 13.0k | } |
170 | 27.2k | u = (Unicode *)greallocn(u, size, sizeof(Unicode)); |
171 | 27.2k | } |
172 | | |
173 | 0 | GString *TextString::toPDFTextString() { |
174 | 0 | GString *s; |
175 | 0 | GBool useUnicode; |
176 | 0 | int i; |
177 | |
|
178 | 0 | useUnicode = gFalse; |
179 | 0 | for (i = 0; i < len; ++i) { |
180 | 0 | if (u[i] >= 0x80) { |
181 | 0 | useUnicode = gTrue; |
182 | 0 | break; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | s = new GString(); |
186 | 0 | if (useUnicode) { |
187 | 0 | s->append((char)0xfe); |
188 | 0 | s->append((char)0xff); |
189 | 0 | for (i = 0; i < len; ++i) { |
190 | 0 | s->append((char)(u[i] >> 8)); |
191 | 0 | s->append((char)u[i]); |
192 | 0 | } |
193 | 0 | } else { |
194 | 0 | for (i = 0; i < len; ++i) { |
195 | 0 | s->append((char)u[i]); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | return s; |
199 | 0 | } |
200 | | |
201 | 123 | GString *TextString::toUTF8() { |
202 | 123 | GString *s = new GString(); |
203 | 56.5k | for (int i = 0; i < len; ++i) { |
204 | 56.4k | char buf[8]; |
205 | 56.4k | int n = mapUTF8(u[i], buf, sizeof(buf)); |
206 | 56.4k | s->append(buf, n); |
207 | 56.4k | } |
208 | 123 | return s; |
209 | 123 | } |