/src/xpdf-4.06/xpdf/TextString.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // TextString.cc |
4 | | // |
5 | | // Copyright 2011-2013 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <string.h> |
12 | | #include "gmem.h" |
13 | | #include "gmempp.h" |
14 | | #include "GString.h" |
15 | | #include "PDFDocEncoding.h" |
16 | | #include "UTF8.h" |
17 | | #include "TextString.h" |
18 | | |
19 | | //------------------------------------------------------------------------ |
20 | | |
21 | 7.75k | TextString::TextString() { |
22 | 7.75k | u = NULL; |
23 | 7.75k | len = size = 0; |
24 | 7.75k | } |
25 | | |
26 | 7.58k | TextString::TextString(GString *s) { |
27 | 7.58k | u = NULL; |
28 | 7.58k | len = size = 0; |
29 | 7.58k | append(s); |
30 | 7.58k | } |
31 | | |
32 | 835 | TextString::TextString(TextString *s) { |
33 | 835 | len = size = s->len; |
34 | 835 | if (len) { |
35 | 835 | u = (Unicode *)gmallocn(size, sizeof(Unicode)); |
36 | 835 | memcpy(u, s->u, len * sizeof(Unicode)); |
37 | 835 | } else { |
38 | 0 | u = NULL; |
39 | 0 | } |
40 | 835 | } |
41 | | |
42 | 16.1k | TextString::~TextString() { |
43 | 16.1k | gfree(u); |
44 | 16.1k | } |
45 | | |
46 | 0 | TextString *TextString::append(Unicode c) { |
47 | 0 | expand(1); |
48 | 0 | u[len] = c; |
49 | 0 | ++len; |
50 | 0 | return this; |
51 | 0 | } |
52 | | |
53 | 7.58k | TextString *TextString::append(GString *s) { |
54 | 7.58k | return insert(len, s); |
55 | 7.58k | } |
56 | | |
57 | 29.8k | TextString *TextString::insert(int idx, Unicode c) { |
58 | 29.8k | if (idx >= 0 && idx <= len) { |
59 | 29.8k | expand(1); |
60 | 29.8k | if (idx < len) { |
61 | 29.8k | memmove(u + idx + 1, u + idx, (len - idx) * sizeof(Unicode)); |
62 | 29.8k | } |
63 | 29.8k | u[idx] = c; |
64 | 29.8k | ++len; |
65 | 29.8k | } |
66 | 29.8k | return this; |
67 | 29.8k | } |
68 | | |
69 | 217k | TextString *TextString::insert(int idx, Unicode *u2, int n) { |
70 | 217k | if (idx >= 0 && idx <= len) { |
71 | 217k | expand(n); |
72 | 217k | if (idx < len) { |
73 | 143k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
74 | 143k | } |
75 | 217k | memcpy(u + idx, u2, n * sizeof(Unicode)); |
76 | 217k | len += n; |
77 | 217k | } |
78 | 217k | return this; |
79 | 217k | } |
80 | | |
81 | 38.1k | TextString *TextString::insert(int idx, GString *s) { |
82 | 38.1k | Unicode uBuf[100]; |
83 | 38.1k | int n, i; |
84 | | |
85 | 38.1k | if (idx >= 0 && idx <= len) { |
86 | | // look for a UTF-16BE BOM |
87 | 38.1k | if ((s->getChar(0) & 0xff) == 0xfe && |
88 | 24.5k | (s->getChar(1) & 0xff) == 0xff) { |
89 | 24.0k | i = 2; |
90 | 24.0k | n = 0; |
91 | 3.94M | while (getUTF16BE(s, &i, uBuf + n)) { |
92 | 3.92M | ++n; |
93 | 3.92M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
94 | 33.5k | insert(idx, uBuf, n); |
95 | 33.5k | idx += n; |
96 | 33.5k | n = 0; |
97 | 33.5k | } |
98 | 3.92M | } |
99 | 24.0k | if (n > 0) { |
100 | 23.7k | insert(idx, uBuf, n); |
101 | 23.7k | } |
102 | | |
103 | | // look for a UTF-16LE BOM |
104 | | // (technically, this isn't allowed by the PDF spec, but some |
105 | | // PDF files use it) |
106 | 24.0k | } else if ((s->getChar(0) & 0xff) == 0xff && |
107 | 2.45k | (s->getChar(1) & 0xff) == 0xfe) { |
108 | 2.08k | i = 2; |
109 | 2.08k | n = 0; |
110 | 10.6M | while (getUTF16LE(s, &i, uBuf + n)) { |
111 | 10.6M | ++n; |
112 | 10.6M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
113 | 105k | insert(idx, uBuf, n); |
114 | 105k | idx += n; |
115 | 105k | n = 0; |
116 | 105k | } |
117 | 10.6M | } |
118 | 2.08k | if (n > 0) { |
119 | 1.87k | insert(idx, uBuf, n); |
120 | 1.87k | } |
121 | | |
122 | | // look for a UTF-8 BOM |
123 | 11.9k | } else if ((s->getChar(0) & 0xff) == 0xef && |
124 | 1.79k | (s->getChar(1) & 0xff) == 0xbb && |
125 | 1.45k | (s->getChar(2) & 0xff) == 0xbf) { |
126 | 1.20k | i = 3; |
127 | 1.20k | n = 0; |
128 | 5.14M | while (getUTF8(s, &i, uBuf + n)) { |
129 | 5.14M | ++n; |
130 | 5.14M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
131 | 50.8k | insert(idx, uBuf, n); |
132 | 50.8k | idx += n; |
133 | 50.8k | n = 0; |
134 | 50.8k | } |
135 | 5.14M | } |
136 | 1.20k | if (n > 0) { |
137 | 1.15k | insert(idx, uBuf, n); |
138 | 1.15k | } |
139 | | |
140 | | // otherwise, use PDFDocEncoding |
141 | 10.7k | } else { |
142 | 10.7k | n = s->getLength(); |
143 | 10.7k | expand(n); |
144 | 10.7k | if (idx < len) { |
145 | 8.71k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
146 | 8.71k | } |
147 | 26.7M | for (i = 0; i < n; ++i) { |
148 | 26.7M | u[idx + i] = pdfDocEncoding[s->getChar(i) & 0xff]; |
149 | 26.7M | } |
150 | 10.7k | len += n; |
151 | 10.7k | } |
152 | 38.1k | } |
153 | 38.1k | return this; |
154 | 38.1k | } |
155 | | |
156 | 257k | void TextString::expand(int delta) { |
157 | 257k | int newLen; |
158 | | |
159 | 257k | newLen = len + delta; |
160 | 257k | if (delta > INT_MAX - len) { |
161 | | // trigger an out-of-memory error |
162 | 0 | size = -1; |
163 | 257k | } else if (newLen <= size) { |
164 | 233k | return; |
165 | 233k | } else if (size > 0 && size <= INT_MAX / 2 && size*2 >= newLen) { |
166 | 14.8k | size *= 2; |
167 | 14.8k | } else { |
168 | 9.06k | size = newLen; |
169 | 9.06k | } |
170 | 23.9k | u = (Unicode *)greallocn(u, size, sizeof(Unicode)); |
171 | 23.9k | } |
172 | | |
173 | 0 | GString *TextString::toPDFTextString() { |
174 | 0 | GString *s; |
175 | 0 | GBool useUnicode; |
176 | 0 | int i; |
177 | |
|
178 | 0 | useUnicode = gFalse; |
179 | 0 | for (i = 0; i < len; ++i) { |
180 | 0 | if (u[i] >= 0x80) { |
181 | 0 | useUnicode = gTrue; |
182 | 0 | break; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | s = new GString(); |
186 | 0 | if (useUnicode) { |
187 | 0 | s->append((char)0xfe); |
188 | 0 | s->append((char)0xff); |
189 | 0 | for (i = 0; i < len; ++i) { |
190 | 0 | s->append((char)(u[i] >> 8)); |
191 | 0 | s->append((char)u[i]); |
192 | 0 | } |
193 | 0 | } else { |
194 | 0 | for (i = 0; i < len; ++i) { |
195 | 0 | s->append((char)u[i]); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | return s; |
199 | 0 | } |
200 | | |
201 | 152 | GString *TextString::toUTF8() { |
202 | 152 | GString *s = new GString(); |
203 | 65.1k | for (int i = 0; i < len; ++i) { |
204 | 65.0k | char buf[8]; |
205 | 65.0k | int n = mapUTF8(u[i], buf, sizeof(buf)); |
206 | 65.0k | s->append(buf, n); |
207 | 65.0k | } |
208 | 152 | return s; |
209 | 152 | } |