/src/ghostpdl/xps/xpsxml.c
Line | Count | Source |
1 | | /* Copyright (C) 2001-2025 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Simple XML document object model on top of Expat. */ |
18 | | |
19 | | #include "ghostxps.h" |
20 | | |
21 | | #include <expat.h> |
22 | | |
23 | | #define XMLBUFLEN 4096 |
24 | | |
25 | 11.5k | #define NS_XPS "http://schemas.microsoft.com/xps/2005/06" |
26 | 11.5k | #define NS_MC "http://schemas.openxmlformats.org/markup-compatibility/2006" |
27 | 11.5k | #define NS_OXPS "http://schemas.openxps.org/oxps/v1.0" |
28 | | typedef struct xps_parser_s xps_parser_t; |
29 | | |
30 | | struct xps_parser_s |
31 | | { |
32 | | xps_context_t *ctx; |
33 | | xps_item_t *root; |
34 | | xps_item_t *head; |
35 | | const char *error; |
36 | | char *base; /* base of relative URIs */ |
37 | | }; |
38 | | |
39 | | struct xps_item_s |
40 | | { |
41 | | char *name; |
42 | | char **atts; |
43 | | xps_item_t *up; |
44 | | xps_item_t *down; |
45 | | xps_item_t *tail; |
46 | | xps_item_t *next; |
47 | | }; |
48 | | |
49 | | static const char * |
50 | | skip_namespace(const char *s) |
51 | 47.8k | { |
52 | 47.8k | const char *p = strchr(s, ' '); |
53 | 47.8k | if (p) |
54 | 40 | return p + 1; |
55 | 47.8k | return s; |
56 | 47.8k | } |
57 | | |
58 | | static void |
59 | | on_open_tag(void *zp, const char *ns_name, const char **atts) |
60 | 11.5k | { |
61 | 11.5k | xps_parser_t *parser = zp; |
62 | 11.5k | xps_context_t *ctx = parser->ctx; |
63 | 11.5k | xps_item_t *item; |
64 | 11.5k | xps_item_t *tail; |
65 | 11.5k | int namelen; |
66 | 11.5k | int attslen; |
67 | 11.5k | int textlen; |
68 | 11.5k | const char *name; |
69 | 11.5k | char *p; |
70 | 11.5k | int i; |
71 | | |
72 | 11.5k | if (parser->error) |
73 | 0 | return; |
74 | | |
75 | | /* check namespace */ |
76 | | |
77 | 11.5k | name = NULL; |
78 | | |
79 | 11.5k | p = strstr(ns_name, NS_XPS); |
80 | 11.5k | if (p == ns_name) |
81 | 10.9k | { |
82 | 10.9k | name = strchr(ns_name, ' ') + 1; |
83 | 10.9k | } |
84 | | |
85 | 11.5k | p = strstr(ns_name, NS_MC); |
86 | 11.5k | if (p == ns_name) |
87 | 0 | { |
88 | 0 | name = strchr(ns_name, ' ') + 1; |
89 | 0 | } |
90 | | |
91 | 11.5k | p = strstr(ns_name, NS_OXPS); |
92 | 11.5k | if (p == ns_name) |
93 | 0 | { |
94 | 0 | name = strchr(ns_name, ' ') + 1; |
95 | 0 | } |
96 | | |
97 | 11.5k | if (!name) |
98 | 557 | { |
99 | 557 | dmprintf1(ctx->memory, "unknown namespace: %s\n", ns_name); |
100 | 557 | name = ns_name; |
101 | 557 | } |
102 | | |
103 | | /* count size to alloc */ |
104 | | |
105 | 11.5k | namelen = strlen(name) + 1; /* zero terminated */ |
106 | 11.5k | attslen = sizeof(char*); /* with space for sentinel */ |
107 | 11.5k | textlen = 0; |
108 | 59.3k | for (i = 0; atts[i]; i++) |
109 | 47.8k | { |
110 | 47.8k | attslen += sizeof(char*); |
111 | 47.8k | if ((i & 1) == 0) |
112 | 23.9k | textlen += strlen(skip_namespace(atts[i])) + 1; |
113 | 23.9k | else |
114 | 23.9k | textlen += strlen(atts[i]) + 1; |
115 | 47.8k | } |
116 | | |
117 | 11.5k | item = xps_alloc(ctx, (size_t)sizeof(xps_item_t) + attslen + namelen + textlen); |
118 | 11.5k | if (!item) { |
119 | 0 | parser->error = "out of memory"; |
120 | 0 | gs_throw(gs_error_VMerror, "out of memory.\n"); |
121 | 0 | return; |
122 | 0 | } |
123 | | |
124 | | /* copy strings to new memory */ |
125 | | |
126 | 11.5k | item->atts = (char**) (((char*)item) + sizeof(xps_item_t)); |
127 | 11.5k | item->name = ((char*)item) + sizeof(xps_item_t) + attslen; |
128 | 11.5k | p = ((char*)item) + sizeof(xps_item_t) + attslen + namelen; |
129 | | |
130 | 11.5k | strcpy(item->name, name); |
131 | 59.3k | for (i = 0; atts[i]; i++) |
132 | 47.8k | { |
133 | 47.8k | item->atts[i] = p; |
134 | 47.8k | if ((i & 1) == 0) |
135 | 23.9k | strcpy(item->atts[i], skip_namespace(atts[i])); |
136 | 23.9k | else |
137 | 23.9k | strcpy(item->atts[i], atts[i]); |
138 | 47.8k | p += strlen(p) + 1; |
139 | 47.8k | } |
140 | | |
141 | 11.5k | item->atts[i] = 0; |
142 | | |
143 | | /* link item into tree */ |
144 | | |
145 | 11.5k | item->up = parser->head; |
146 | 11.5k | item->down = NULL; |
147 | 11.5k | item->next = NULL; |
148 | | |
149 | 11.5k | if (!parser->head) |
150 | 20 | { |
151 | 20 | parser->root = item; |
152 | 20 | parser->head = item; |
153 | 20 | return; |
154 | 20 | } |
155 | | |
156 | 11.5k | if (!parser->head->down) |
157 | 40 | { |
158 | 40 | parser->head->down = item; |
159 | 40 | parser->head->tail = item; |
160 | 40 | parser->head = item; |
161 | 40 | return; |
162 | 40 | } |
163 | | |
164 | 11.4k | tail = parser->head->tail; |
165 | 11.4k | tail->next = item; |
166 | 11.4k | parser->head->tail = item; |
167 | 11.4k | parser->head = item; |
168 | 11.4k | } |
169 | | |
170 | | static void |
171 | | on_close_tag(void *zp, const char *name) |
172 | 11.5k | { |
173 | 11.5k | xps_parser_t *parser = zp; |
174 | | |
175 | 11.5k | if (parser->error) |
176 | 0 | return; |
177 | | |
178 | 11.5k | if (parser->head) |
179 | 11.5k | parser->head = parser->head->up; |
180 | 11.5k | } |
181 | | |
182 | | static inline int |
183 | | is_xml_space(int c) |
184 | 31.9k | { |
185 | 31.9k | return c == ' ' || c == '\t' || c == '\r' || c == '\n'; |
186 | 31.9k | } |
187 | | |
188 | | static void |
189 | | on_text(void *zp, char *buf, int len) |
190 | 21.6k | { |
191 | 21.6k | xps_parser_t *parser = zp; |
192 | 21.6k | xps_context_t *ctx = parser->ctx; |
193 | 21.6k | const char *atts[3]; |
194 | 21.6k | int i; |
195 | | |
196 | 21.6k | if (parser->error) |
197 | 0 | return; |
198 | | |
199 | 53.1k | for (i = 0; i < len; i++) |
200 | 31.9k | { |
201 | 31.9k | if (!is_xml_space(buf[i])) |
202 | 557 | { |
203 | 557 | char *tmp = xps_alloc(ctx, (size_t)len + 1); |
204 | 557 | if (!tmp) |
205 | 0 | { |
206 | 0 | parser->error = "out of memory"; |
207 | 0 | gs_throw(gs_error_VMerror, "out of memory.\n"); |
208 | 0 | return; |
209 | 0 | } |
210 | | |
211 | 557 | atts[0] = ""; |
212 | 557 | atts[1] = tmp; |
213 | 557 | atts[2] = NULL; |
214 | | |
215 | 557 | memcpy(tmp, buf, len); |
216 | 557 | tmp[len] = 0; |
217 | 557 | on_open_tag(zp, "", atts); |
218 | 557 | on_close_tag(zp, ""); |
219 | 557 | xps_free(ctx, tmp); |
220 | 557 | return; |
221 | 557 | } |
222 | 31.9k | } |
223 | 21.6k | } |
224 | | |
225 | | xps_item_t * |
226 | | xps_parse_xml(xps_context_t *ctx, byte *buf, int len) |
227 | 20 | { |
228 | 20 | xps_parser_t parser; |
229 | 20 | XML_Parser xp; |
230 | 20 | int code; |
231 | | |
232 | 20 | parser.ctx = ctx; |
233 | 20 | parser.root = NULL; |
234 | 20 | parser.head = NULL; |
235 | 20 | parser.error = NULL; |
236 | | |
237 | 20 | xp = XML_ParserCreateNS(NULL, ' '); |
238 | 20 | if (!xp) |
239 | 0 | { |
240 | 0 | gs_throw(-1, "xml error: could not create expat parser"); |
241 | 0 | return NULL; |
242 | 0 | } |
243 | | |
244 | 20 | XML_SetUserData(xp, &parser); |
245 | 20 | XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); |
246 | 20 | XML_SetStartElementHandler(xp, (XML_StartElementHandler)on_open_tag); |
247 | 20 | XML_SetEndElementHandler(xp, (XML_EndElementHandler)on_close_tag); |
248 | 20 | XML_SetCharacterDataHandler(xp, (XML_CharacterDataHandler)on_text); |
249 | | |
250 | 20 | code = XML_Parse(xp, (char*)buf, len, 1); |
251 | 20 | if (code == 0 || parser.error != NULL) |
252 | 17 | { |
253 | 17 | if (parser.root) |
254 | 17 | xps_free_item(ctx, parser.root); |
255 | 17 | if (XML_ErrorString(XML_GetErrorCode(xp)) != 0) |
256 | 17 | emprintf1(parser.ctx->memory, "XML_Error: %s\n", XML_ErrorString(XML_GetErrorCode(xp))); |
257 | 17 | XML_ParserFree(xp); |
258 | 17 | gs_throw1(-1, "parser error: %s", parser.error); |
259 | 17 | return NULL; |
260 | 17 | } |
261 | | |
262 | 3 | XML_ParserFree(xp); |
263 | | |
264 | 3 | return parser.root; |
265 | 20 | } |
266 | | |
267 | | xps_item_t * |
268 | | xps_next(xps_item_t *item) |
269 | 8.59k | { |
270 | 8.59k | return item->next; |
271 | 8.59k | } |
272 | | |
273 | | xps_item_t * |
274 | | xps_down(xps_item_t *item) |
275 | 5.74k | { |
276 | 5.74k | return item->down; |
277 | 5.74k | } |
278 | | |
279 | | char * |
280 | | xps_tag(xps_item_t *item) |
281 | 28.6k | { |
282 | 28.6k | return item->name; |
283 | 28.6k | } |
284 | | |
285 | | char * |
286 | | xps_att(xps_item_t *item, const char *att) |
287 | 57.3k | { |
288 | 57.3k | int i; |
289 | 163k | for (i = 0; item->atts[i]; i += 2) |
290 | 115k | if (!strcmp(item->atts[i], att)) |
291 | 8.85k | return item->atts[i + 1]; |
292 | 48.4k | return NULL; |
293 | 57.3k | } |
294 | | |
295 | | void |
296 | | xps_detach_and_free_remainder(xps_context_t *ctx, xps_item_t *root, xps_item_t *item) |
297 | 0 | { |
298 | 0 | if (item->up) |
299 | 0 | item->up->down = NULL; |
300 | |
|
301 | 0 | xps_free_item(ctx, item->next); |
302 | 0 | item->next = NULL; |
303 | |
|
304 | 0 | xps_free_item(ctx, root); |
305 | 0 | } |
306 | | |
307 | | void |
308 | | xps_free_item(xps_context_t *ctx, xps_item_t *item) |
309 | 60 | { |
310 | 60 | xps_item_t *next; |
311 | 11.5k | while (item) |
312 | 11.5k | { |
313 | 11.5k | next = item->next; |
314 | 11.5k | if (item->down) |
315 | 40 | xps_free_item(ctx, item->down); |
316 | 11.5k | xps_free(ctx, item); |
317 | 11.5k | item = next; |
318 | 11.5k | } |
319 | 60 | } |
320 | | |
321 | | static void indent(int n) |
322 | 0 | { |
323 | 0 | while (n--) |
324 | 0 | dlprintf(" "); |
325 | 0 | } |
326 | | |
327 | | static void |
328 | | xps_debug_item_imp(xps_item_t *item, int level, int loop) |
329 | 0 | { |
330 | 0 | int i; |
331 | |
|
332 | 0 | while (item) |
333 | 0 | { |
334 | 0 | indent(level); |
335 | |
|
336 | 0 | if (strlen(item->name) == 0) |
337 | 0 | dlprintf1("%s\n", item->atts[1]); |
338 | 0 | else |
339 | 0 | { |
340 | 0 | dlprintf1("<%s", item->name); |
341 | |
|
342 | 0 | for (i = 0; item->atts[i]; i += 2) |
343 | 0 | dlprintf2(" %s=\"%s\"", item->atts[i], item->atts[i+1]); |
344 | |
|
345 | 0 | if (item->down) |
346 | 0 | { |
347 | 0 | dlprintf(">\n"); |
348 | 0 | xps_debug_item_imp(item->down, level + 1, 1); |
349 | 0 | indent(level); |
350 | 0 | dlprintf1("</%s>\n", item->name); |
351 | 0 | } |
352 | 0 | else |
353 | 0 | dlprintf(" />\n"); |
354 | 0 | } |
355 | |
|
356 | 0 | item = item->next; |
357 | |
|
358 | 0 | if (!loop) |
359 | 0 | return; |
360 | 0 | } |
361 | 0 | } |
362 | | |
363 | | void |
364 | | xps_debug_item(xps_item_t *item, int level) |
365 | 0 | { |
366 | 0 | xps_debug_item_imp(item, level, 0); |
367 | 0 | } |