/src/xpdf-4.05/xpdf/TextString.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // TextString.cc |
4 | | // |
5 | | // Copyright 2011-2013 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <string.h> |
12 | | #include "gmem.h" |
13 | | #include "gmempp.h" |
14 | | #include "GString.h" |
15 | | #include "PDFDocEncoding.h" |
16 | | #include "UTF8.h" |
17 | | #include "TextString.h" |
18 | | |
19 | | //------------------------------------------------------------------------ |
20 | | |
21 | 42.7k | TextString::TextString() { |
22 | 42.7k | u = NULL; |
23 | 42.7k | len = size = 0; |
24 | 42.7k | } |
25 | | |
26 | 11.1k | TextString::TextString(GString *s) { |
27 | 11.1k | u = NULL; |
28 | 11.1k | len = size = 0; |
29 | 11.1k | append(s); |
30 | 11.1k | } |
31 | | |
32 | 13.7k | TextString::TextString(TextString *s) { |
33 | 13.7k | len = size = s->len; |
34 | 13.7k | if (len) { |
35 | 12.0k | u = (Unicode *)gmallocn(size, sizeof(Unicode)); |
36 | 12.0k | memcpy(u, s->u, len * sizeof(Unicode)); |
37 | 12.0k | } else { |
38 | 1.71k | u = NULL; |
39 | 1.71k | } |
40 | 13.7k | } |
41 | | |
42 | 67.6k | TextString::~TextString() { |
43 | 67.6k | gfree(u); |
44 | 67.6k | } |
45 | | |
46 | 0 | TextString *TextString::append(Unicode c) { |
47 | 0 | expand(1); |
48 | 0 | u[len] = c; |
49 | 0 | ++len; |
50 | 0 | return this; |
51 | 0 | } |
52 | | |
53 | 11.1k | TextString *TextString::append(GString *s) { |
54 | 11.1k | return insert(len, s); |
55 | 11.1k | } |
56 | | |
57 | 23.8k | TextString *TextString::insert(int idx, Unicode c) { |
58 | 23.8k | if (idx >= 0 && idx <= len) { |
59 | 23.8k | expand(1); |
60 | 23.8k | if (idx < len) { |
61 | 23.8k | memmove(u + idx + 1, u + idx, (len - idx) * sizeof(Unicode)); |
62 | 23.8k | } |
63 | 23.8k | u[idx] = c; |
64 | 23.8k | ++len; |
65 | 23.8k | } |
66 | 23.8k | return this; |
67 | 23.8k | } |
68 | | |
69 | 206k | TextString *TextString::insert(int idx, Unicode *u2, int n) { |
70 | 206k | if (idx >= 0 && idx <= len) { |
71 | 206k | expand(n); |
72 | 206k | if (idx < len) { |
73 | 171k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
74 | 171k | } |
75 | 206k | memcpy(u + idx, u2, n * sizeof(Unicode)); |
76 | 206k | len += n; |
77 | 206k | } |
78 | 206k | return this; |
79 | 206k | } |
80 | | |
81 | 35.9k | TextString *TextString::insert(int idx, GString *s) { |
82 | 35.9k | Unicode uBuf[100]; |
83 | 35.9k | int n, i; |
84 | | |
85 | 35.9k | if (idx >= 0 && idx <= len) { |
86 | | // look for a UTF-16BE BOM |
87 | 35.9k | if ((s->getChar(0) & 0xff) == 0xfe && |
88 | 35.9k | (s->getChar(1) & 0xff) == 0xff) { |
89 | 25.0k | i = 2; |
90 | 25.0k | n = 0; |
91 | 5.22M | while (getUTF16BE(s, &i, uBuf + n)) { |
92 | 5.19M | ++n; |
93 | 5.19M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
94 | 48.4k | insert(idx, uBuf, n); |
95 | 48.4k | idx += n; |
96 | 48.4k | n = 0; |
97 | 48.4k | } |
98 | 5.19M | } |
99 | 25.0k | if (n > 0) { |
100 | 24.7k | insert(idx, uBuf, n); |
101 | 24.7k | } |
102 | | |
103 | | // look for a UTF-16LE BOM |
104 | | // (technically, this isn't allowed by the PDF spec, but some |
105 | | // PDF files use it) |
106 | 25.0k | } else if ((s->getChar(0) & 0xff) == 0xff && |
107 | 10.8k | (s->getChar(1) & 0xff) == 0xfe) { |
108 | 1.74k | i = 2; |
109 | 1.74k | n = 0; |
110 | 13.2M | while (getUTF16LE(s, &i, uBuf + n)) { |
111 | 13.2M | ++n; |
112 | 13.2M | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
113 | 132k | insert(idx, uBuf, n); |
114 | 132k | idx += n; |
115 | 132k | n = 0; |
116 | 132k | } |
117 | 13.2M | } |
118 | 1.74k | if (n > 0) { |
119 | 1.45k | insert(idx, uBuf, n); |
120 | 1.45k | } |
121 | | |
122 | | // look for a UTF-8 BOM |
123 | 9.15k | } else if ((s->getChar(0) & 0xff) == 0xef && |
124 | 9.15k | (s->getChar(1) & 0xff) == 0xbb && |
125 | 9.15k | (s->getChar(2) & 0xff) == 0xbf) { |
126 | 0 | i = 3; |
127 | 0 | n = 0; |
128 | 0 | while (getUTF8(s, &i, uBuf + n)) { |
129 | 0 | ++n; |
130 | 0 | if (n == sizeof(uBuf) / sizeof(Unicode)) { |
131 | 0 | insert(idx, uBuf, n); |
132 | 0 | idx += n; |
133 | 0 | n = 0; |
134 | 0 | } |
135 | 0 | } |
136 | 0 | if (n > 0) { |
137 | 0 | insert(idx, uBuf, n); |
138 | 0 | } |
139 | | |
140 | | // otherwise, use PDFDocEncoding |
141 | 9.15k | } else { |
142 | 9.15k | n = s->getLength(); |
143 | 9.15k | expand(n); |
144 | 9.15k | if (idx < len) { |
145 | 4.59k | memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); |
146 | 4.59k | } |
147 | 5.28M | for (i = 0; i < n; ++i) { |
148 | 5.27M | u[idx + i] = pdfDocEncoding[s->getChar(i) & 0xff]; |
149 | 5.27M | } |
150 | 9.15k | len += n; |
151 | 9.15k | } |
152 | 35.9k | } |
153 | 35.9k | return this; |
154 | 35.9k | } |
155 | | |
156 | 239k | void TextString::expand(int delta) { |
157 | 239k | int newLen; |
158 | | |
159 | 239k | newLen = len + delta; |
160 | 239k | if (delta > INT_MAX - len) { |
161 | | // trigger an out-of-memory error |
162 | 0 | size = -1; |
163 | 239k | } else if (newLen <= size) { |
164 | 207k | return; |
165 | 207k | } else if (size > 0 && size <= INT_MAX / 2 && size*2 >= newLen) { |
166 | 18.7k | size *= 2; |
167 | 18.7k | } else { |
168 | 13.3k | size = newLen; |
169 | 13.3k | } |
170 | 32.1k | u = (Unicode *)greallocn(u, size, sizeof(Unicode)); |
171 | 32.1k | } |
172 | | |
173 | 0 | GString *TextString::toPDFTextString() { |
174 | 0 | GString *s; |
175 | 0 | GBool useUnicode; |
176 | 0 | int i; |
177 | |
|
178 | 0 | useUnicode = gFalse; |
179 | 0 | for (i = 0; i < len; ++i) { |
180 | 0 | if (u[i] >= 0x80) { |
181 | 0 | useUnicode = gTrue; |
182 | 0 | break; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | s = new GString(); |
186 | 0 | if (useUnicode) { |
187 | 0 | s->append((char)0xfe); |
188 | 0 | s->append((char)0xff); |
189 | 0 | for (i = 0; i < len; ++i) { |
190 | 0 | s->append((char)(u[i] >> 8)); |
191 | 0 | s->append((char)u[i]); |
192 | 0 | } |
193 | 0 | } else { |
194 | 0 | for (i = 0; i < len; ++i) { |
195 | 0 | s->append((char)u[i]); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | return s; |
199 | 0 | } |
200 | | |
201 | 175 | GString *TextString::toUTF8() { |
202 | 175 | GString *s = new GString(); |
203 | 12.6k | for (int i = 0; i < len; ++i) { |
204 | 12.4k | char buf[8]; |
205 | 12.4k | int n = mapUTF8(u[i], buf, sizeof(buf)); |
206 | 12.4k | s->append(buf, n); |
207 | 12.4k | } |
208 | 175 | return s; |
209 | 175 | } |