/src/mupdf/source/fitz/text.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | |
27 | | fz_text * |
28 | | fz_new_text(fz_context *ctx) |
29 | 156k | { |
30 | 156k | fz_text *text = fz_malloc_struct(ctx, fz_text); |
31 | 156k | text->refs = 1; |
32 | 156k | return text; |
33 | 156k | } |
34 | | |
35 | | fz_text * |
36 | | fz_keep_text(fz_context *ctx, const fz_text *textc) |
37 | 936 | { |
38 | 936 | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
39 | | |
40 | 936 | return fz_keep_imp(ctx, text, &text->refs); |
41 | 936 | } |
42 | | |
43 | | void |
44 | | fz_drop_text(fz_context *ctx, const fz_text *textc) |
45 | 188k | { |
46 | 188k | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
47 | | |
48 | 188k | if (fz_drop_imp(ctx, text, &text->refs)) |
49 | 156k | { |
50 | 156k | fz_text_span *span = text->head; |
51 | 448k | while (span) |
52 | 292k | { |
53 | 292k | fz_text_span *next = span->next; |
54 | 292k | fz_drop_font(ctx, span->font); |
55 | 292k | fz_free(ctx, span->items); |
56 | 292k | fz_free(ctx, span); |
57 | 292k | span = next; |
58 | 292k | } |
59 | 156k | fz_free(ctx, text); |
60 | 156k | } |
61 | 188k | } |
62 | | |
63 | | static fz_text_span * |
64 | | fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
65 | 292k | { |
66 | 292k | fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); |
67 | 292k | span->font = fz_keep_font(ctx, font); |
68 | 292k | span->wmode = wmode; |
69 | 292k | span->bidi_level = bidi_level; |
70 | 292k | span->markup_dir = markup_dir; |
71 | 292k | span->language = language; |
72 | 292k | span->trm = trm; |
73 | 292k | span->trm.e = 0; |
74 | 292k | span->trm.f = 0; |
75 | 292k | return span; |
76 | 292k | } |
77 | | |
78 | | static fz_text_span * |
79 | | fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
80 | 5.91M | { |
81 | 5.91M | if (!text->tail) |
82 | 156k | { |
83 | 156k | text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
84 | 156k | } |
85 | 5.76M | else if (text->tail->font != font || |
86 | 5.76M | text->tail->wmode != wmode || |
87 | 5.76M | text->tail->bidi_level != bidi_level || |
88 | 5.76M | text->tail->markup_dir != markup_dir || |
89 | 5.76M | text->tail->language != language || |
90 | 5.76M | text->tail->trm.a != trm.a || |
91 | 5.76M | text->tail->trm.b != trm.b || |
92 | 5.76M | text->tail->trm.c != trm.c || |
93 | 5.76M | text->tail->trm.d != trm.d) |
94 | 135k | { |
95 | 135k | text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
96 | 135k | } |
97 | 5.91M | return text->tail; |
98 | 5.91M | } |
99 | | |
100 | | static void |
101 | | fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) |
102 | 5.91M | { |
103 | 5.91M | int new_cap = span->cap; |
104 | 5.91M | if (span->len + n < new_cap) |
105 | 5.38M | return; |
106 | 945k | while (span->len + n > new_cap) |
107 | 412k | new_cap = new_cap + 36; |
108 | 532k | span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item); |
109 | 532k | span->cap = new_cap; |
110 | 532k | } |
111 | | |
112 | | void |
113 | | fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) |
114 | 5.91M | { |
115 | 5.91M | fz_text_span *span; |
116 | | |
117 | 5.91M | if (text->refs != 1) |
118 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects"); |
119 | | |
120 | 5.91M | span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); |
121 | | |
122 | 5.91M | fz_grow_text_span(ctx, span, 1); |
123 | | |
124 | 5.91M | span->items[span->len].ucs = ucs; |
125 | 5.91M | span->items[span->len].gid = gid; |
126 | 5.91M | span->items[span->len].cid = cid; |
127 | 5.91M | span->items[span->len].x = trm.e; |
128 | 5.91M | span->items[span->len].y = trm.f; |
129 | 5.91M | span->items[span->len].adv = adv; |
130 | 5.91M | span->len++; |
131 | 5.91M | } |
132 | | |
133 | | void |
134 | | fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) |
135 | 91.9k | { |
136 | 91.9k | float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0; |
137 | 91.9k | fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang); |
138 | 91.9k | } |
139 | | |
140 | | fz_matrix |
141 | | fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, |
142 | | int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) |
143 | 295 | { |
144 | 295 | fz_font *font; |
145 | 295 | int gid, ucs; |
146 | 295 | float adv; |
147 | | |
148 | 61.8k | while (*s) |
149 | 61.5k | { |
150 | 61.5k | s += fz_chartorune(&ucs, s); |
151 | 61.5k | gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); |
152 | 61.5k | if (gid >= 0) |
153 | 61.5k | adv = fz_advance_glyph(ctx, font, gid, wmode); |
154 | 0 | else |
155 | 0 | adv = 0; |
156 | 61.5k | fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language); |
157 | 61.5k | if (wmode == 0) |
158 | 61.5k | trm = fz_pre_translate(trm, adv, 0); |
159 | 0 | else |
160 | 0 | trm = fz_pre_translate(trm, 0, -adv); |
161 | 61.5k | } |
162 | | |
163 | 295 | return trm; |
164 | 295 | } |
165 | | |
166 | | fz_matrix |
167 | | fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, |
168 | | int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) |
169 | 0 | { |
170 | 0 | fz_font *font; |
171 | 0 | int gid, ucs; |
172 | 0 | float adv; |
173 | |
|
174 | 0 | while (*s) |
175 | 0 | { |
176 | 0 | s += fz_chartorune(&ucs, s); |
177 | 0 | gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); |
178 | 0 | adv = fz_advance_glyph(ctx, font, gid, wmode); |
179 | 0 | if (wmode == 0) |
180 | 0 | trm = fz_pre_translate(trm, adv, 0); |
181 | 0 | else |
182 | 0 | trm = fz_pre_translate(trm, 0, -adv); |
183 | 0 | } |
184 | |
|
185 | 0 | return trm; |
186 | 0 | } |
187 | | |
188 | | fz_rect |
189 | | fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm) |
190 | 2.65k | { |
191 | 2.65k | fz_text_span *span; |
192 | 2.65k | fz_matrix tm, trm; |
193 | 2.65k | fz_rect gbox; |
194 | 2.65k | fz_rect bbox; |
195 | 2.65k | int i; |
196 | | |
197 | 2.65k | bbox = fz_empty_rect; |
198 | | |
199 | 5.43k | for (span = text->head; span; span = span->next) |
200 | 2.78k | { |
201 | 2.78k | if (span->len > 0) |
202 | 2.78k | { |
203 | 2.78k | tm = span->trm; |
204 | 248k | for (i = 0; i < span->len; i++) |
205 | 245k | { |
206 | 245k | if (span->items[i].gid >= 0) |
207 | 245k | { |
208 | 245k | tm.e = span->items[i].x; |
209 | 245k | tm.f = span->items[i].y; |
210 | 245k | trm = fz_concat(tm, ctm); |
211 | 245k | gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm); |
212 | 245k | bbox = fz_union_rect(bbox, gbox); |
213 | 245k | } |
214 | 245k | } |
215 | 2.78k | } |
216 | 2.78k | } |
217 | | |
218 | 2.65k | if (!fz_is_empty_rect(bbox)) |
219 | 2.08k | { |
220 | 2.08k | if (stroke) |
221 | 147 | bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm); |
222 | | |
223 | | /* Compensate for the glyph cache limited positioning precision */ |
224 | 2.08k | bbox.x0 -= 1; |
225 | 2.08k | bbox.y0 -= 1; |
226 | 2.08k | bbox.x1 += 1; |
227 | 2.08k | bbox.y1 += 1; |
228 | 2.08k | } |
229 | | |
230 | 2.65k | return bbox; |
231 | 2.65k | } |
232 | | |
233 | | fz_text_language fz_text_language_from_string(const char *str) |
234 | 81.4k | { |
235 | 81.4k | fz_text_language lang; |
236 | | |
237 | 81.4k | if (str == NULL) |
238 | 0 | return FZ_LANG_UNSET; |
239 | | |
240 | 81.4k | if (!strcmp(str, "zh-Hant") || |
241 | 81.4k | !strcmp(str, "zh-HK") || |
242 | 81.4k | !strcmp(str, "zh-MO") || |
243 | 81.4k | !strcmp(str, "zh-SG") || |
244 | 81.4k | !strcmp(str, "zh-TW")) |
245 | 0 | return FZ_LANG_zh_Hant; |
246 | 81.4k | if (!strcmp(str, "zh-Hans") || |
247 | 81.4k | !strcmp(str, "zh-CN")) |
248 | 0 | return FZ_LANG_zh_Hans; |
249 | | |
250 | | /* 1st char */ |
251 | 81.4k | if (str[0] >= 'a' && str[0] <= 'z') |
252 | 4.71k | lang = str[0] - 'a' + 1; |
253 | 76.7k | else if (str[0] >= 'A' && str[0] <= 'Z') |
254 | 48 | lang = str[0] - 'A' + 1; |
255 | 76.7k | else |
256 | 76.7k | return 0; |
257 | | |
258 | | /* 2nd char */ |
259 | 4.76k | if (str[1] >= 'a' && str[1] <= 'z') |
260 | 4.71k | lang += 27*(str[1] - 'a' + 1); |
261 | 48 | else if (str[1] >= 'A' && str[1] <= 'Z') |
262 | 48 | lang += 27*(str[1] - 'A' + 1); |
263 | 0 | else |
264 | 0 | return 0; /* There are no valid 1 char language codes */ |
265 | | |
266 | | /* 3nd char */ |
267 | 4.76k | if (str[2] >= 'a' && str[2] <= 'z') |
268 | 0 | lang += 27*27*(str[2] - 'a' + 1); |
269 | 4.76k | else if (str[2] >= 'A' && str[2] <= 'Z') |
270 | 0 | lang += 27*27*(str[2] - 'A' + 1); |
271 | | |
272 | | /* We don't support iso 639-6 4 char codes, cos the standard |
273 | | * has been withdrawn, and no one uses them. */ |
274 | 4.76k | return lang; |
275 | 4.76k | } |
276 | | |
277 | | char *fz_string_from_text_language(char str[8], fz_text_language lang) |
278 | 0 | { |
279 | 0 | int c; |
280 | | |
281 | | /* str is supposed to be at least 8 chars in size */ |
282 | 0 | if (str == NULL) |
283 | 0 | return NULL; |
284 | | |
285 | 0 | if (lang == FZ_LANG_zh_Hant) |
286 | 0 | fz_strlcpy(str, "zh-Hant", 8); |
287 | 0 | else if (lang == FZ_LANG_zh_Hans) |
288 | 0 | fz_strlcpy(str, "zh-Hans", 8); |
289 | 0 | else |
290 | 0 | { |
291 | 0 | c = lang % 27; |
292 | 0 | lang = lang / 27; |
293 | 0 | str[0] = c == 0 ? 0 : c - 1 + 'a'; |
294 | 0 | c = lang % 27; |
295 | 0 | lang = lang / 27; |
296 | 0 | str[1] = c == 0 ? 0 : c - 1 + 'a'; |
297 | 0 | c = lang % 27; |
298 | 0 | str[2] = c == 0 ? 0 : c - 1 + 'a'; |
299 | 0 | str[3] = 0; |
300 | 0 | } |
301 | |
|
302 | 0 | return str; |
303 | 0 | } |