/src/libxml2-2.9.7/HTMLtree.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * HTMLtree.c : implementation of access function for an HTML tree. |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | * |
6 | | * daniel@veillard.com |
7 | | */ |
8 | | |
9 | | |
10 | | #define IN_LIBXML |
11 | | #include "libxml.h" |
12 | | #ifdef LIBXML_HTML_ENABLED |
13 | | |
14 | | #include <string.h> /* for memset() only ! */ |
15 | | |
16 | | #ifdef HAVE_CTYPE_H |
17 | | #include <ctype.h> |
18 | | #endif |
19 | | #ifdef HAVE_STDLIB_H |
20 | | #include <stdlib.h> |
21 | | #endif |
22 | | |
23 | | #include <libxml/xmlmemory.h> |
24 | | #include <libxml/HTMLparser.h> |
25 | | #include <libxml/HTMLtree.h> |
26 | | #include <libxml/entities.h> |
27 | | #include <libxml/valid.h> |
28 | | #include <libxml/xmlerror.h> |
29 | | #include <libxml/parserInternals.h> |
30 | | #include <libxml/globals.h> |
31 | | #include <libxml/uri.h> |
32 | | |
33 | | #include "buf.h" |
34 | | |
35 | | /************************************************************************ |
36 | | * * |
37 | | * Getting/Setting encoding meta tags * |
38 | | * * |
39 | | ************************************************************************/ |
40 | | |
41 | | /** |
42 | | * htmlGetMetaEncoding: |
43 | | * @doc: the document |
44 | | * |
45 | | * Encoding definition lookup in the Meta tags |
46 | | * |
47 | | * Returns the current encoding as flagged in the HTML source |
48 | | */ |
49 | | const xmlChar * |
50 | 0 | htmlGetMetaEncoding(htmlDocPtr doc) { |
51 | 0 | htmlNodePtr cur; |
52 | 0 | const xmlChar *content; |
53 | 0 | const xmlChar *encoding; |
54 | |
|
55 | 0 | if (doc == NULL) |
56 | 0 | return(NULL); |
57 | 0 | cur = doc->children; |
58 | | |
59 | | /* |
60 | | * Search the html |
61 | | */ |
62 | 0 | while (cur != NULL) { |
63 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
64 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"html")) |
65 | 0 | break; |
66 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"head")) |
67 | 0 | goto found_head; |
68 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) |
69 | 0 | goto found_meta; |
70 | 0 | } |
71 | 0 | cur = cur->next; |
72 | 0 | } |
73 | 0 | if (cur == NULL) |
74 | 0 | return(NULL); |
75 | 0 | cur = cur->children; |
76 | | |
77 | | /* |
78 | | * Search the head |
79 | | */ |
80 | 0 | while (cur != NULL) { |
81 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
82 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"head")) |
83 | 0 | break; |
84 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) |
85 | 0 | goto found_meta; |
86 | 0 | } |
87 | 0 | cur = cur->next; |
88 | 0 | } |
89 | 0 | if (cur == NULL) |
90 | 0 | return(NULL); |
91 | 0 | found_head: |
92 | 0 | cur = cur->children; |
93 | | |
94 | | /* |
95 | | * Search the meta elements |
96 | | */ |
97 | 0 | found_meta: |
98 | 0 | while (cur != NULL) { |
99 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
100 | 0 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) { |
101 | 0 | xmlAttrPtr attr = cur->properties; |
102 | 0 | int http; |
103 | 0 | const xmlChar *value; |
104 | |
|
105 | 0 | content = NULL; |
106 | 0 | http = 0; |
107 | 0 | while (attr != NULL) { |
108 | 0 | if ((attr->children != NULL) && |
109 | 0 | (attr->children->type == XML_TEXT_NODE) && |
110 | 0 | (attr->children->next == NULL)) { |
111 | 0 | value = attr->children->content; |
112 | 0 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) |
113 | 0 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
114 | 0 | http = 1; |
115 | 0 | else if ((value != NULL) |
116 | 0 | && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) |
117 | 0 | content = value; |
118 | 0 | if ((http != 0) && (content != NULL)) |
119 | 0 | goto found_content; |
120 | 0 | } |
121 | 0 | attr = attr->next; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | } |
125 | 0 | cur = cur->next; |
126 | 0 | } |
127 | 0 | return(NULL); |
128 | | |
129 | 0 | found_content: |
130 | 0 | encoding = xmlStrstr(content, BAD_CAST"charset="); |
131 | 0 | if (encoding == NULL) |
132 | 0 | encoding = xmlStrstr(content, BAD_CAST"Charset="); |
133 | 0 | if (encoding == NULL) |
134 | 0 | encoding = xmlStrstr(content, BAD_CAST"CHARSET="); |
135 | 0 | if (encoding != NULL) { |
136 | 0 | encoding += 8; |
137 | 0 | } else { |
138 | 0 | encoding = xmlStrstr(content, BAD_CAST"charset ="); |
139 | 0 | if (encoding == NULL) |
140 | 0 | encoding = xmlStrstr(content, BAD_CAST"Charset ="); |
141 | 0 | if (encoding == NULL) |
142 | 0 | encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); |
143 | 0 | if (encoding != NULL) |
144 | 0 | encoding += 9; |
145 | 0 | } |
146 | 0 | if (encoding != NULL) { |
147 | 0 | while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
148 | 0 | } |
149 | 0 | return(encoding); |
150 | 0 | } |
151 | | |
152 | | /** |
153 | | * htmlSetMetaEncoding: |
154 | | * @doc: the document |
155 | | * @encoding: the encoding string |
156 | | * |
157 | | * Sets the current encoding in the Meta tags |
158 | | * NOTE: this will not change the document content encoding, just |
159 | | * the META flag associated. |
160 | | * |
161 | | * Returns 0 in case of success and -1 in case of error |
162 | | */ |
163 | | int |
164 | 0 | htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { |
165 | 0 | htmlNodePtr cur, meta = NULL, head = NULL; |
166 | 0 | const xmlChar *content = NULL; |
167 | 0 | char newcontent[100]; |
168 | |
|
169 | 0 | newcontent[0] = 0; |
170 | |
|
171 | 0 | if (doc == NULL) |
172 | 0 | return(-1); |
173 | | |
174 | | /* html isn't a real encoding it's just libxml2 way to get entities */ |
175 | 0 | if (!xmlStrcasecmp(encoding, BAD_CAST "html")) |
176 | 0 | return(-1); |
177 | | |
178 | 0 | if (encoding != NULL) { |
179 | 0 | snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", |
180 | 0 | (char *)encoding); |
181 | 0 | newcontent[sizeof(newcontent) - 1] = 0; |
182 | 0 | } |
183 | |
|
184 | 0 | cur = doc->children; |
185 | | |
186 | | /* |
187 | | * Search the html |
188 | | */ |
189 | 0 | while (cur != NULL) { |
190 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
191 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) |
192 | 0 | break; |
193 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) |
194 | 0 | goto found_head; |
195 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) |
196 | 0 | goto found_meta; |
197 | 0 | } |
198 | 0 | cur = cur->next; |
199 | 0 | } |
200 | 0 | if (cur == NULL) |
201 | 0 | return(-1); |
202 | 0 | cur = cur->children; |
203 | | |
204 | | /* |
205 | | * Search the head |
206 | | */ |
207 | 0 | while (cur != NULL) { |
208 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
209 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) |
210 | 0 | break; |
211 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { |
212 | 0 | head = cur->parent; |
213 | 0 | goto found_meta; |
214 | 0 | } |
215 | 0 | } |
216 | 0 | cur = cur->next; |
217 | 0 | } |
218 | 0 | if (cur == NULL) |
219 | 0 | return(-1); |
220 | 0 | found_head: |
221 | 0 | head = cur; |
222 | 0 | if (cur->children == NULL) |
223 | 0 | goto create; |
224 | 0 | cur = cur->children; |
225 | |
|
226 | 0 | found_meta: |
227 | | /* |
228 | | * Search and update all the remaining the meta elements carrying |
229 | | * encoding informations |
230 | | */ |
231 | 0 | while (cur != NULL) { |
232 | 0 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
233 | 0 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { |
234 | 0 | xmlAttrPtr attr = cur->properties; |
235 | 0 | int http; |
236 | 0 | const xmlChar *value; |
237 | |
|
238 | 0 | content = NULL; |
239 | 0 | http = 0; |
240 | 0 | while (attr != NULL) { |
241 | 0 | if ((attr->children != NULL) && |
242 | 0 | (attr->children->type == XML_TEXT_NODE) && |
243 | 0 | (attr->children->next == NULL)) { |
244 | 0 | value = attr->children->content; |
245 | 0 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) |
246 | 0 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
247 | 0 | http = 1; |
248 | 0 | else |
249 | 0 | { |
250 | 0 | if ((value != NULL) && |
251 | 0 | (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) |
252 | 0 | content = value; |
253 | 0 | } |
254 | 0 | if ((http != 0) && (content != NULL)) |
255 | 0 | break; |
256 | 0 | } |
257 | 0 | attr = attr->next; |
258 | 0 | } |
259 | 0 | if ((http != 0) && (content != NULL)) { |
260 | 0 | meta = cur; |
261 | 0 | break; |
262 | 0 | } |
263 | |
|
264 | 0 | } |
265 | 0 | } |
266 | 0 | cur = cur->next; |
267 | 0 | } |
268 | 0 | create: |
269 | 0 | if (meta == NULL) { |
270 | 0 | if ((encoding != NULL) && (head != NULL)) { |
271 | | /* |
272 | | * Create a new Meta element with the right attributes |
273 | | */ |
274 | |
|
275 | 0 | meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); |
276 | 0 | if (head->children == NULL) |
277 | 0 | xmlAddChild(head, meta); |
278 | 0 | else |
279 | 0 | xmlAddPrevSibling(head->children, meta); |
280 | 0 | xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); |
281 | 0 | xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
282 | 0 | } |
283 | 0 | } else { |
284 | | /* remove the meta tag if NULL is passed */ |
285 | 0 | if (encoding == NULL) { |
286 | 0 | xmlUnlinkNode(meta); |
287 | 0 | xmlFreeNode(meta); |
288 | 0 | } |
289 | | /* change the document only if there is a real encoding change */ |
290 | 0 | else if (xmlStrcasestr(content, encoding) == NULL) { |
291 | 0 | xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
292 | 0 | } |
293 | 0 | } |
294 | | |
295 | |
|
296 | 0 | return(0); |
297 | 0 | } |
298 | | |
299 | | /** |
300 | | * booleanHTMLAttrs: |
301 | | * |
302 | | * These are the HTML attributes which will be output |
303 | | * in minimized form, i.e. <option selected="selected"> will be |
304 | | * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" |
305 | | * |
306 | | */ |
307 | | static const char* htmlBooleanAttrs[] = { |
308 | | "checked", "compact", "declare", "defer", "disabled", "ismap", |
309 | | "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", |
310 | | "selected", NULL |
311 | | }; |
312 | | |
313 | | |
314 | | /** |
315 | | * htmlIsBooleanAttr: |
316 | | * @name: the name of the attribute to check |
317 | | * |
318 | | * Determine if a given attribute is a boolean attribute. |
319 | | * |
320 | | * returns: false if the attribute is not boolean, true otherwise. |
321 | | */ |
322 | | int |
323 | | htmlIsBooleanAttr(const xmlChar *name) |
324 | 0 | { |
325 | 0 | int i = 0; |
326 | |
|
327 | 0 | while (htmlBooleanAttrs[i] != NULL) { |
328 | 0 | if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) |
329 | 0 | return 1; |
330 | 0 | i++; |
331 | 0 | } |
332 | 0 | return 0; |
333 | 0 | } |
334 | | |
335 | | #ifdef LIBXML_OUTPUT_ENABLED |
336 | | /* |
337 | | * private routine exported from xmlIO.c |
338 | | */ |
339 | | xmlOutputBufferPtr |
340 | | xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); |
341 | | /************************************************************************ |
342 | | * * |
343 | | * Output error handlers * |
344 | | * * |
345 | | ************************************************************************/ |
346 | | /** |
347 | | * htmlSaveErrMemory: |
348 | | * @extra: extra informations |
349 | | * |
350 | | * Handle an out of memory condition |
351 | | */ |
352 | | static void |
353 | | htmlSaveErrMemory(const char *extra) |
354 | 0 | { |
355 | 0 | __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); |
356 | 0 | } |
357 | | |
358 | | /** |
359 | | * htmlSaveErr: |
360 | | * @code: the error number |
361 | | * @node: the location of the error. |
362 | | * @extra: extra informations |
363 | | * |
364 | | * Handle an out of memory condition |
365 | | */ |
366 | | static void |
367 | | htmlSaveErr(int code, xmlNodePtr node, const char *extra) |
368 | 0 | { |
369 | 0 | const char *msg = NULL; |
370 | |
|
371 | 0 | switch(code) { |
372 | 0 | case XML_SAVE_NOT_UTF8: |
373 | 0 | msg = "string is not in UTF-8\n"; |
374 | 0 | break; |
375 | 0 | case XML_SAVE_CHAR_INVALID: |
376 | 0 | msg = "invalid character value\n"; |
377 | 0 | break; |
378 | 0 | case XML_SAVE_UNKNOWN_ENCODING: |
379 | 0 | msg = "unknown encoding %s\n"; |
380 | 0 | break; |
381 | 0 | case XML_SAVE_NO_DOCTYPE: |
382 | 0 | msg = "HTML has no DOCTYPE\n"; |
383 | 0 | break; |
384 | 0 | default: |
385 | 0 | msg = "unexpected error number\n"; |
386 | 0 | } |
387 | 0 | __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); |
388 | 0 | } |
389 | | |
390 | | /************************************************************************ |
391 | | * * |
392 | | * Dumping HTML tree content to a simple buffer * |
393 | | * * |
394 | | ************************************************************************/ |
395 | | |
396 | | /** |
397 | | * htmlBufNodeDumpFormat: |
398 | | * @buf: the xmlBufPtr output |
399 | | * @doc: the document |
400 | | * @cur: the current node |
401 | | * @format: should formatting spaces been added |
402 | | * |
403 | | * Dump an HTML node, recursive behaviour,children are printed too. |
404 | | * |
405 | | * Returns the number of byte written or -1 in case of error |
406 | | */ |
407 | | static size_t |
408 | | htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
409 | 0 | int format) { |
410 | 0 | size_t use; |
411 | 0 | int ret; |
412 | 0 | xmlOutputBufferPtr outbuf; |
413 | |
|
414 | 0 | if (cur == NULL) { |
415 | 0 | return (-1); |
416 | 0 | } |
417 | 0 | if (buf == NULL) { |
418 | 0 | return (-1); |
419 | 0 | } |
420 | 0 | outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); |
421 | 0 | if (outbuf == NULL) { |
422 | 0 | htmlSaveErrMemory("allocating HTML output buffer"); |
423 | 0 | return (-1); |
424 | 0 | } |
425 | 0 | memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); |
426 | 0 | outbuf->buffer = buf; |
427 | 0 | outbuf->encoder = NULL; |
428 | 0 | outbuf->writecallback = NULL; |
429 | 0 | outbuf->closecallback = NULL; |
430 | 0 | outbuf->context = NULL; |
431 | 0 | outbuf->written = 0; |
432 | |
|
433 | 0 | use = xmlBufUse(buf); |
434 | 0 | htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); |
435 | 0 | xmlFree(outbuf); |
436 | 0 | ret = xmlBufUse(buf) - use; |
437 | 0 | return (ret); |
438 | 0 | } |
439 | | |
440 | | /** |
441 | | * htmlNodeDump: |
442 | | * @buf: the HTML buffer output |
443 | | * @doc: the document |
444 | | * @cur: the current node |
445 | | * |
446 | | * Dump an HTML node, recursive behaviour,children are printed too, |
447 | | * and formatting returns are added. |
448 | | * |
449 | | * Returns the number of byte written or -1 in case of error |
450 | | */ |
451 | | int |
452 | 0 | htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { |
453 | 0 | xmlBufPtr buffer; |
454 | 0 | size_t ret; |
455 | |
|
456 | 0 | if ((buf == NULL) || (cur == NULL)) |
457 | 0 | return(-1); |
458 | | |
459 | 0 | xmlInitParser(); |
460 | 0 | buffer = xmlBufFromBuffer(buf); |
461 | 0 | if (buffer == NULL) |
462 | 0 | return(-1); |
463 | | |
464 | 0 | ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); |
465 | |
|
466 | 0 | xmlBufBackToBuffer(buffer); |
467 | |
|
468 | 0 | if (ret > INT_MAX) |
469 | 0 | return(-1); |
470 | 0 | return((int) ret); |
471 | 0 | } |
472 | | |
473 | | /** |
474 | | * htmlNodeDumpFileFormat: |
475 | | * @out: the FILE pointer |
476 | | * @doc: the document |
477 | | * @cur: the current node |
478 | | * @encoding: the document encoding |
479 | | * @format: should formatting spaces been added |
480 | | * |
481 | | * Dump an HTML node, recursive behaviour,children are printed too. |
482 | | * |
483 | | * TODO: if encoding == NULL try to save in the doc encoding |
484 | | * |
485 | | * returns: the number of byte written or -1 in case of failure. |
486 | | */ |
487 | | int |
488 | | htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, |
489 | 0 | xmlNodePtr cur, const char *encoding, int format) { |
490 | 0 | xmlOutputBufferPtr buf; |
491 | 0 | xmlCharEncodingHandlerPtr handler = NULL; |
492 | 0 | int ret; |
493 | |
|
494 | 0 | xmlInitParser(); |
495 | |
|
496 | 0 | if (encoding != NULL) { |
497 | 0 | xmlCharEncoding enc; |
498 | |
|
499 | 0 | enc = xmlParseCharEncoding(encoding); |
500 | 0 | if (enc != XML_CHAR_ENCODING_UTF8) { |
501 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
502 | 0 | if (handler == NULL) |
503 | 0 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
504 | 0 | } |
505 | 0 | } |
506 | | |
507 | | /* |
508 | | * Fallback to HTML or ASCII when the encoding is unspecified |
509 | | */ |
510 | 0 | if (handler == NULL) |
511 | 0 | handler = xmlFindCharEncodingHandler("HTML"); |
512 | 0 | if (handler == NULL) |
513 | 0 | handler = xmlFindCharEncodingHandler("ascii"); |
514 | | |
515 | | /* |
516 | | * save the content to a temp buffer. |
517 | | */ |
518 | 0 | buf = xmlOutputBufferCreateFile(out, handler); |
519 | 0 | if (buf == NULL) return(0); |
520 | | |
521 | 0 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
522 | |
|
523 | 0 | ret = xmlOutputBufferClose(buf); |
524 | 0 | return(ret); |
525 | 0 | } |
526 | | |
527 | | /** |
528 | | * htmlNodeDumpFile: |
529 | | * @out: the FILE pointer |
530 | | * @doc: the document |
531 | | * @cur: the current node |
532 | | * |
533 | | * Dump an HTML node, recursive behaviour,children are printed too, |
534 | | * and formatting returns are added. |
535 | | */ |
536 | | void |
537 | 0 | htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { |
538 | 0 | htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); |
539 | 0 | } |
540 | | |
541 | | /** |
542 | | * htmlDocDumpMemoryFormat: |
543 | | * @cur: the document |
544 | | * @mem: OUT: the memory pointer |
545 | | * @size: OUT: the memory length |
546 | | * @format: should formatting spaces been added |
547 | | * |
548 | | * Dump an HTML document in memory and return the xmlChar * and it's size. |
549 | | * It's up to the caller to free the memory. |
550 | | */ |
551 | | void |
552 | 0 | htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { |
553 | 0 | xmlOutputBufferPtr buf; |
554 | 0 | xmlCharEncodingHandlerPtr handler = NULL; |
555 | 0 | const char *encoding; |
556 | |
|
557 | 0 | xmlInitParser(); |
558 | |
|
559 | 0 | if ((mem == NULL) || (size == NULL)) |
560 | 0 | return; |
561 | 0 | if (cur == NULL) { |
562 | 0 | *mem = NULL; |
563 | 0 | *size = 0; |
564 | 0 | return; |
565 | 0 | } |
566 | | |
567 | 0 | encoding = (const char *) htmlGetMetaEncoding(cur); |
568 | |
|
569 | 0 | if (encoding != NULL) { |
570 | 0 | xmlCharEncoding enc; |
571 | |
|
572 | 0 | enc = xmlParseCharEncoding(encoding); |
573 | 0 | if (enc != cur->charset) { |
574 | 0 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
575 | | /* |
576 | | * Not supported yet |
577 | | */ |
578 | 0 | *mem = NULL; |
579 | 0 | *size = 0; |
580 | 0 | return; |
581 | 0 | } |
582 | | |
583 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
584 | 0 | if (handler == NULL) |
585 | 0 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
586 | |
|
587 | 0 | } else { |
588 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
589 | 0 | } |
590 | 0 | } |
591 | | |
592 | | /* |
593 | | * Fallback to HTML or ASCII when the encoding is unspecified |
594 | | */ |
595 | 0 | if (handler == NULL) |
596 | 0 | handler = xmlFindCharEncodingHandler("HTML"); |
597 | 0 | if (handler == NULL) |
598 | 0 | handler = xmlFindCharEncodingHandler("ascii"); |
599 | |
|
600 | 0 | buf = xmlAllocOutputBufferInternal(handler); |
601 | 0 | if (buf == NULL) { |
602 | 0 | *mem = NULL; |
603 | 0 | *size = 0; |
604 | 0 | return; |
605 | 0 | } |
606 | | |
607 | 0 | htmlDocContentDumpFormatOutput(buf, cur, NULL, format); |
608 | |
|
609 | 0 | xmlOutputBufferFlush(buf); |
610 | 0 | if (buf->conv != NULL) { |
611 | 0 | *size = xmlBufUse(buf->conv); |
612 | 0 | *mem = xmlStrndup(xmlBufContent(buf->conv), *size); |
613 | 0 | } else { |
614 | 0 | *size = xmlBufUse(buf->buffer); |
615 | 0 | *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); |
616 | 0 | } |
617 | 0 | (void)xmlOutputBufferClose(buf); |
618 | 0 | } |
619 | | |
620 | | /** |
621 | | * htmlDocDumpMemory: |
622 | | * @cur: the document |
623 | | * @mem: OUT: the memory pointer |
624 | | * @size: OUT: the memory length |
625 | | * |
626 | | * Dump an HTML document in memory and return the xmlChar * and it's size. |
627 | | * It's up to the caller to free the memory. |
628 | | */ |
629 | | void |
630 | 0 | htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { |
631 | 0 | htmlDocDumpMemoryFormat(cur, mem, size, 1); |
632 | 0 | } |
633 | | |
634 | | |
635 | | /************************************************************************ |
636 | | * * |
637 | | * Dumping HTML tree content to an I/O output buffer * |
638 | | * * |
639 | | ************************************************************************/ |
640 | | |
641 | | void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); |
642 | | |
643 | | /** |
644 | | * htmlDtdDumpOutput: |
645 | | * @buf: the HTML buffer output |
646 | | * @doc: the document |
647 | | * @encoding: the encoding string |
648 | | * |
649 | | * TODO: check whether encoding is needed |
650 | | * |
651 | | * Dump the HTML document DTD, if any. |
652 | | */ |
653 | | static void |
654 | | htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
655 | 0 | const char *encoding ATTRIBUTE_UNUSED) { |
656 | 0 | xmlDtdPtr cur = doc->intSubset; |
657 | |
|
658 | 0 | if (cur == NULL) { |
659 | 0 | htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); |
660 | 0 | return; |
661 | 0 | } |
662 | 0 | xmlOutputBufferWriteString(buf, "<!DOCTYPE "); |
663 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
664 | 0 | if (cur->ExternalID != NULL) { |
665 | 0 | xmlOutputBufferWriteString(buf, " PUBLIC "); |
666 | 0 | xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); |
667 | 0 | if (cur->SystemID != NULL) { |
668 | 0 | xmlOutputBufferWriteString(buf, " "); |
669 | 0 | xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
670 | 0 | } |
671 | 0 | } else if (cur->SystemID != NULL && |
672 | 0 | xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) { |
673 | 0 | xmlOutputBufferWriteString(buf, " SYSTEM "); |
674 | 0 | xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
675 | 0 | } |
676 | 0 | xmlOutputBufferWriteString(buf, ">\n"); |
677 | 0 | } |
678 | | |
679 | | /** |
680 | | * htmlAttrDumpOutput: |
681 | | * @buf: the HTML buffer output |
682 | | * @doc: the document |
683 | | * @cur: the attribute pointer |
684 | | * @encoding: the encoding string |
685 | | * |
686 | | * Dump an HTML attribute |
687 | | */ |
688 | | static void |
689 | | htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, |
690 | 0 | const char *encoding ATTRIBUTE_UNUSED) { |
691 | 0 | xmlChar *value; |
692 | | |
693 | | /* |
694 | | * The html output method should not escape a & character |
695 | | * occurring in an attribute value immediately followed by |
696 | | * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). |
697 | | * This is implemented in xmlEncodeEntitiesReentrant |
698 | | */ |
699 | |
|
700 | 0 | if (cur == NULL) { |
701 | 0 | return; |
702 | 0 | } |
703 | 0 | xmlOutputBufferWriteString(buf, " "); |
704 | 0 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
705 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
706 | 0 | xmlOutputBufferWriteString(buf, ":"); |
707 | 0 | } |
708 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
709 | 0 | if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { |
710 | 0 | value = xmlNodeListGetString(doc, cur->children, 0); |
711 | 0 | if (value) { |
712 | 0 | xmlOutputBufferWriteString(buf, "="); |
713 | 0 | if ((cur->ns == NULL) && (cur->parent != NULL) && |
714 | 0 | (cur->parent->ns == NULL) && |
715 | 0 | ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || |
716 | 0 | (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || |
717 | 0 | (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || |
718 | 0 | ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && |
719 | 0 | (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { |
720 | 0 | xmlChar *tmp = value; |
721 | | /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ |
722 | 0 | xmlBufCCat(buf->buffer, "\""); |
723 | |
|
724 | 0 | while (IS_BLANK_CH(*tmp)) tmp++; |
725 | | |
726 | | /* URI Escape everything, except server side includes. */ |
727 | 0 | for ( ; ; ) { |
728 | 0 | xmlChar *escaped; |
729 | 0 | xmlChar endChar; |
730 | 0 | xmlChar *end = NULL; |
731 | 0 | xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); |
732 | 0 | if (start != NULL) { |
733 | 0 | end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); |
734 | 0 | if (end != NULL) { |
735 | 0 | *start = '\0'; |
736 | 0 | } |
737 | 0 | } |
738 | | |
739 | | /* Escape the whole string, or until start (set to '\0'). */ |
740 | 0 | escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); |
741 | 0 | if (escaped != NULL) { |
742 | 0 | xmlBufCat(buf->buffer, escaped); |
743 | 0 | xmlFree(escaped); |
744 | 0 | } else { |
745 | 0 | xmlBufCat(buf->buffer, tmp); |
746 | 0 | } |
747 | |
|
748 | 0 | if (end == NULL) { /* Everything has been written. */ |
749 | 0 | break; |
750 | 0 | } |
751 | | |
752 | | /* Do not escape anything within server side includes. */ |
753 | 0 | *start = '<'; /* Restore the first character of "<!--". */ |
754 | 0 | end += 3; /* strlen("-->") */ |
755 | 0 | endChar = *end; |
756 | 0 | *end = '\0'; |
757 | 0 | xmlBufCat(buf->buffer, start); |
758 | 0 | *end = endChar; |
759 | 0 | tmp = end; |
760 | 0 | } |
761 | |
|
762 | 0 | xmlBufCCat(buf->buffer, "\""); |
763 | 0 | } else { |
764 | 0 | xmlBufWriteQuotedString(buf->buffer, value); |
765 | 0 | } |
766 | 0 | xmlFree(value); |
767 | 0 | } else { |
768 | 0 | xmlOutputBufferWriteString(buf, "=\"\""); |
769 | 0 | } |
770 | 0 | } |
771 | 0 | } |
772 | | |
773 | | /** |
774 | | * htmlAttrListDumpOutput: |
775 | | * @buf: the HTML buffer output |
776 | | * @doc: the document |
777 | | * @cur: the first attribute pointer |
778 | | * @encoding: the encoding string |
779 | | * |
780 | | * Dump a list of HTML attributes |
781 | | */ |
782 | | static void |
783 | 0 | htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { |
784 | 0 | if (cur == NULL) { |
785 | 0 | return; |
786 | 0 | } |
787 | 0 | while (cur != NULL) { |
788 | 0 | htmlAttrDumpOutput(buf, doc, cur, encoding); |
789 | 0 | cur = cur->next; |
790 | 0 | } |
791 | 0 | } |
792 | | |
793 | | |
794 | | |
795 | | /** |
796 | | * htmlNodeListDumpOutput: |
797 | | * @buf: the HTML buffer output |
798 | | * @doc: the document |
799 | | * @cur: the first node |
800 | | * @encoding: the encoding string |
801 | | * @format: should formatting spaces been added |
802 | | * |
803 | | * Dump an HTML node list, recursive behaviour,children are printed too. |
804 | | */ |
805 | | static void |
806 | | htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
807 | 0 | xmlNodePtr cur, const char *encoding, int format) { |
808 | 0 | if (cur == NULL) { |
809 | 0 | return; |
810 | 0 | } |
811 | 0 | while (cur != NULL) { |
812 | 0 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
813 | 0 | cur = cur->next; |
814 | 0 | } |
815 | 0 | } |
816 | | |
817 | | /** |
818 | | * htmlNodeDumpFormatOutput: |
819 | | * @buf: the HTML buffer output |
820 | | * @doc: the document |
821 | | * @cur: the current node |
822 | | * @encoding: the encoding string |
823 | | * @format: should formatting spaces been added |
824 | | * |
825 | | * Dump an HTML node, recursive behaviour,children are printed too. |
826 | | */ |
827 | | void |
828 | | htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
829 | 0 | xmlNodePtr cur, const char *encoding, int format) { |
830 | 0 | const htmlElemDesc * info; |
831 | |
|
832 | 0 | xmlInitParser(); |
833 | |
|
834 | 0 | if ((cur == NULL) || (buf == NULL)) { |
835 | 0 | return; |
836 | 0 | } |
837 | | /* |
838 | | * Special cases. |
839 | | */ |
840 | 0 | if (cur->type == XML_DTD_NODE) |
841 | 0 | return; |
842 | 0 | if ((cur->type == XML_HTML_DOCUMENT_NODE) || |
843 | 0 | (cur->type == XML_DOCUMENT_NODE)){ |
844 | 0 | htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); |
845 | 0 | return; |
846 | 0 | } |
847 | 0 | if (cur->type == XML_ATTRIBUTE_NODE) { |
848 | 0 | htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); |
849 | 0 | return; |
850 | 0 | } |
851 | 0 | if (cur->type == HTML_TEXT_NODE) { |
852 | 0 | if (cur->content != NULL) { |
853 | 0 | if (((cur->name == (const xmlChar *)xmlStringText) || |
854 | 0 | (cur->name != (const xmlChar *)xmlStringTextNoenc)) && |
855 | 0 | ((cur->parent == NULL) || |
856 | 0 | ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && |
857 | 0 | (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { |
858 | 0 | xmlChar *buffer; |
859 | |
|
860 | 0 | buffer = xmlEncodeEntitiesReentrant(doc, cur->content); |
861 | 0 | if (buffer != NULL) { |
862 | 0 | xmlOutputBufferWriteString(buf, (const char *)buffer); |
863 | 0 | xmlFree(buffer); |
864 | 0 | } |
865 | 0 | } else { |
866 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
867 | 0 | } |
868 | 0 | } |
869 | 0 | return; |
870 | 0 | } |
871 | 0 | if (cur->type == HTML_COMMENT_NODE) { |
872 | 0 | if (cur->content != NULL) { |
873 | 0 | xmlOutputBufferWriteString(buf, "<!--"); |
874 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
875 | 0 | xmlOutputBufferWriteString(buf, "-->"); |
876 | 0 | } |
877 | 0 | return; |
878 | 0 | } |
879 | 0 | if (cur->type == HTML_PI_NODE) { |
880 | 0 | if (cur->name == NULL) |
881 | 0 | return; |
882 | 0 | xmlOutputBufferWriteString(buf, "<?"); |
883 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
884 | 0 | if (cur->content != NULL) { |
885 | 0 | xmlOutputBufferWriteString(buf, " "); |
886 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
887 | 0 | } |
888 | 0 | xmlOutputBufferWriteString(buf, ">"); |
889 | 0 | return; |
890 | 0 | } |
891 | 0 | if (cur->type == HTML_ENTITY_REF_NODE) { |
892 | 0 | xmlOutputBufferWriteString(buf, "&"); |
893 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
894 | 0 | xmlOutputBufferWriteString(buf, ";"); |
895 | 0 | return; |
896 | 0 | } |
897 | 0 | if (cur->type == HTML_PRESERVE_NODE) { |
898 | 0 | if (cur->content != NULL) { |
899 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
900 | 0 | } |
901 | 0 | return; |
902 | 0 | } |
903 | | |
904 | | /* |
905 | | * Get specific HTML info for that node. |
906 | | */ |
907 | 0 | if (cur->ns == NULL) |
908 | 0 | info = htmlTagLookup(cur->name); |
909 | 0 | else |
910 | 0 | info = NULL; |
911 | |
|
912 | 0 | xmlOutputBufferWriteString(buf, "<"); |
913 | 0 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
914 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
915 | 0 | xmlOutputBufferWriteString(buf, ":"); |
916 | 0 | } |
917 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
918 | 0 | if (cur->nsDef) |
919 | 0 | xmlNsListDumpOutput(buf, cur->nsDef); |
920 | 0 | if (cur->properties != NULL) |
921 | 0 | htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); |
922 | |
|
923 | 0 | if ((info != NULL) && (info->empty)) { |
924 | 0 | xmlOutputBufferWriteString(buf, ">"); |
925 | 0 | if ((format) && (!info->isinline) && (cur->next != NULL)) { |
926 | 0 | if ((cur->next->type != HTML_TEXT_NODE) && |
927 | 0 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
928 | 0 | (cur->parent != NULL) && |
929 | 0 | (cur->parent->name != NULL) && |
930 | 0 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
931 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
932 | 0 | } |
933 | 0 | return; |
934 | 0 | } |
935 | 0 | if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && |
936 | 0 | (cur->children == NULL)) { |
937 | 0 | if ((info != NULL) && (info->saveEndTag != 0) && |
938 | 0 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && |
939 | 0 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { |
940 | 0 | xmlOutputBufferWriteString(buf, ">"); |
941 | 0 | } else { |
942 | 0 | xmlOutputBufferWriteString(buf, "></"); |
943 | 0 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
944 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
945 | 0 | xmlOutputBufferWriteString(buf, ":"); |
946 | 0 | } |
947 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
948 | 0 | xmlOutputBufferWriteString(buf, ">"); |
949 | 0 | } |
950 | 0 | if ((format) && (cur->next != NULL) && |
951 | 0 | (info != NULL) && (!info->isinline)) { |
952 | 0 | if ((cur->next->type != HTML_TEXT_NODE) && |
953 | 0 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
954 | 0 | (cur->parent != NULL) && |
955 | 0 | (cur->parent->name != NULL) && |
956 | 0 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
957 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
958 | 0 | } |
959 | 0 | return; |
960 | 0 | } |
961 | 0 | xmlOutputBufferWriteString(buf, ">"); |
962 | 0 | if ((cur->type != XML_ELEMENT_NODE) && |
963 | 0 | (cur->content != NULL)) { |
964 | | /* |
965 | | * Uses the OutputBuffer property to automatically convert |
966 | | * invalids to charrefs |
967 | | */ |
968 | |
|
969 | 0 | xmlOutputBufferWriteString(buf, (const char *) cur->content); |
970 | 0 | } |
971 | 0 | if (cur->children != NULL) { |
972 | 0 | if ((format) && (info != NULL) && (!info->isinline) && |
973 | 0 | (cur->children->type != HTML_TEXT_NODE) && |
974 | 0 | (cur->children->type != HTML_ENTITY_REF_NODE) && |
975 | 0 | (cur->children != cur->last) && |
976 | 0 | (cur->name != NULL) && |
977 | 0 | (cur->name[0] != 'p')) /* p, pre, param */ |
978 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
979 | 0 | htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); |
980 | 0 | if ((format) && (info != NULL) && (!info->isinline) && |
981 | 0 | (cur->last->type != HTML_TEXT_NODE) && |
982 | 0 | (cur->last->type != HTML_ENTITY_REF_NODE) && |
983 | 0 | (cur->children != cur->last) && |
984 | 0 | (cur->name != NULL) && |
985 | 0 | (cur->name[0] != 'p')) /* p, pre, param */ |
986 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
987 | 0 | } |
988 | 0 | xmlOutputBufferWriteString(buf, "</"); |
989 | 0 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
990 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
991 | 0 | xmlOutputBufferWriteString(buf, ":"); |
992 | 0 | } |
993 | 0 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
994 | 0 | xmlOutputBufferWriteString(buf, ">"); |
995 | 0 | if ((format) && (info != NULL) && (!info->isinline) && |
996 | 0 | (cur->next != NULL)) { |
997 | 0 | if ((cur->next->type != HTML_TEXT_NODE) && |
998 | 0 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
999 | 0 | (cur->parent != NULL) && |
1000 | 0 | (cur->parent->name != NULL) && |
1001 | 0 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
1002 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
1003 | 0 | } |
1004 | 0 | } |
1005 | | |
1006 | | /** |
1007 | | * htmlNodeDumpOutput: |
1008 | | * @buf: the HTML buffer output |
1009 | | * @doc: the document |
1010 | | * @cur: the current node |
1011 | | * @encoding: the encoding string |
1012 | | * |
1013 | | * Dump an HTML node, recursive behaviour,children are printed too, |
1014 | | * and formatting returns/spaces are added. |
1015 | | */ |
1016 | | void |
1017 | | htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
1018 | 0 | xmlNodePtr cur, const char *encoding) { |
1019 | 0 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); |
1020 | 0 | } |
1021 | | |
1022 | | /** |
1023 | | * htmlDocContentDumpFormatOutput: |
1024 | | * @buf: the HTML buffer output |
1025 | | * @cur: the document |
1026 | | * @encoding: the encoding string |
1027 | | * @format: should formatting spaces been added |
1028 | | * |
1029 | | * Dump an HTML document. |
1030 | | */ |
1031 | | void |
1032 | | htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
1033 | 0 | const char *encoding, int format) { |
1034 | 0 | int type; |
1035 | |
|
1036 | 0 | xmlInitParser(); |
1037 | |
|
1038 | 0 | if ((buf == NULL) || (cur == NULL)) |
1039 | 0 | return; |
1040 | | |
1041 | | /* |
1042 | | * force to output the stuff as HTML, especially for entities |
1043 | | */ |
1044 | 0 | type = cur->type; |
1045 | 0 | cur->type = XML_HTML_DOCUMENT_NODE; |
1046 | 0 | if (cur->intSubset != NULL) { |
1047 | 0 | htmlDtdDumpOutput(buf, cur, NULL); |
1048 | 0 | } |
1049 | 0 | if (cur->children != NULL) { |
1050 | 0 | htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); |
1051 | 0 | } |
1052 | 0 | xmlOutputBufferWriteString(buf, "\n"); |
1053 | 0 | cur->type = (xmlElementType) type; |
1054 | 0 | } |
1055 | | |
1056 | | /** |
1057 | | * htmlDocContentDumpOutput: |
1058 | | * @buf: the HTML buffer output |
1059 | | * @cur: the document |
1060 | | * @encoding: the encoding string |
1061 | | * |
1062 | | * Dump an HTML document. Formating return/spaces are added. |
1063 | | */ |
1064 | | void |
1065 | | htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
1066 | 0 | const char *encoding) { |
1067 | 0 | htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); |
1068 | 0 | } |
1069 | | |
1070 | | /************************************************************************ |
1071 | | * * |
1072 | | * Saving functions front-ends * |
1073 | | * * |
1074 | | ************************************************************************/ |
1075 | | |
1076 | | /** |
1077 | | * htmlDocDump: |
1078 | | * @f: the FILE* |
1079 | | * @cur: the document |
1080 | | * |
1081 | | * Dump an HTML document to an open FILE. |
1082 | | * |
1083 | | * returns: the number of byte written or -1 in case of failure. |
1084 | | */ |
1085 | | int |
1086 | 0 | htmlDocDump(FILE *f, xmlDocPtr cur) { |
1087 | 0 | xmlOutputBufferPtr buf; |
1088 | 0 | xmlCharEncodingHandlerPtr handler = NULL; |
1089 | 0 | const char *encoding; |
1090 | 0 | int ret; |
1091 | |
|
1092 | 0 | xmlInitParser(); |
1093 | |
|
1094 | 0 | if ((cur == NULL) || (f == NULL)) { |
1095 | 0 | return(-1); |
1096 | 0 | } |
1097 | | |
1098 | 0 | encoding = (const char *) htmlGetMetaEncoding(cur); |
1099 | |
|
1100 | 0 | if (encoding != NULL) { |
1101 | 0 | xmlCharEncoding enc; |
1102 | |
|
1103 | 0 | enc = xmlParseCharEncoding(encoding); |
1104 | 0 | if (enc != cur->charset) { |
1105 | 0 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1106 | | /* |
1107 | | * Not supported yet |
1108 | | */ |
1109 | 0 | return(-1); |
1110 | 0 | } |
1111 | | |
1112 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
1113 | 0 | if (handler == NULL) |
1114 | 0 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1115 | 0 | } else { |
1116 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
1117 | 0 | } |
1118 | 0 | } |
1119 | | |
1120 | | /* |
1121 | | * Fallback to HTML or ASCII when the encoding is unspecified |
1122 | | */ |
1123 | 0 | if (handler == NULL) |
1124 | 0 | handler = xmlFindCharEncodingHandler("HTML"); |
1125 | 0 | if (handler == NULL) |
1126 | 0 | handler = xmlFindCharEncodingHandler("ascii"); |
1127 | |
|
1128 | 0 | buf = xmlOutputBufferCreateFile(f, handler); |
1129 | 0 | if (buf == NULL) return(-1); |
1130 | 0 | htmlDocContentDumpOutput(buf, cur, NULL); |
1131 | |
|
1132 | 0 | ret = xmlOutputBufferClose(buf); |
1133 | 0 | return(ret); |
1134 | 0 | } |
1135 | | |
1136 | | /** |
1137 | | * htmlSaveFile: |
1138 | | * @filename: the filename (or URL) |
1139 | | * @cur: the document |
1140 | | * |
1141 | | * Dump an HTML document to a file. If @filename is "-" the stdout file is |
1142 | | * used. |
1143 | | * returns: the number of byte written or -1 in case of failure. |
1144 | | */ |
1145 | | int |
1146 | 0 | htmlSaveFile(const char *filename, xmlDocPtr cur) { |
1147 | 0 | xmlOutputBufferPtr buf; |
1148 | 0 | xmlCharEncodingHandlerPtr handler = NULL; |
1149 | 0 | const char *encoding; |
1150 | 0 | int ret; |
1151 | |
|
1152 | 0 | if ((cur == NULL) || (filename == NULL)) |
1153 | 0 | return(-1); |
1154 | | |
1155 | 0 | xmlInitParser(); |
1156 | |
|
1157 | 0 | encoding = (const char *) htmlGetMetaEncoding(cur); |
1158 | |
|
1159 | 0 | if (encoding != NULL) { |
1160 | 0 | xmlCharEncoding enc; |
1161 | |
|
1162 | 0 | enc = xmlParseCharEncoding(encoding); |
1163 | 0 | if (enc != cur->charset) { |
1164 | 0 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1165 | | /* |
1166 | | * Not supported yet |
1167 | | */ |
1168 | 0 | return(-1); |
1169 | 0 | } |
1170 | | |
1171 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
1172 | 0 | if (handler == NULL) |
1173 | 0 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1174 | 0 | } |
1175 | 0 | } |
1176 | | |
1177 | | /* |
1178 | | * Fallback to HTML or ASCII when the encoding is unspecified |
1179 | | */ |
1180 | 0 | if (handler == NULL) |
1181 | 0 | handler = xmlFindCharEncodingHandler("HTML"); |
1182 | 0 | if (handler == NULL) |
1183 | 0 | handler = xmlFindCharEncodingHandler("ascii"); |
1184 | | |
1185 | | /* |
1186 | | * save the content to a temp buffer. |
1187 | | */ |
1188 | 0 | buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); |
1189 | 0 | if (buf == NULL) return(0); |
1190 | | |
1191 | 0 | htmlDocContentDumpOutput(buf, cur, NULL); |
1192 | |
|
1193 | 0 | ret = xmlOutputBufferClose(buf); |
1194 | 0 | return(ret); |
1195 | 0 | } |
1196 | | |
1197 | | /** |
1198 | | * htmlSaveFileFormat: |
1199 | | * @filename: the filename |
1200 | | * @cur: the document |
1201 | | * @format: should formatting spaces been added |
1202 | | * @encoding: the document encoding |
1203 | | * |
1204 | | * Dump an HTML document to a file using a given encoding. |
1205 | | * |
1206 | | * returns: the number of byte written or -1 in case of failure. |
1207 | | */ |
1208 | | int |
1209 | | htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
1210 | 0 | const char *encoding, int format) { |
1211 | 0 | xmlOutputBufferPtr buf; |
1212 | 0 | xmlCharEncodingHandlerPtr handler = NULL; |
1213 | 0 | int ret; |
1214 | |
|
1215 | 0 | if ((cur == NULL) || (filename == NULL)) |
1216 | 0 | return(-1); |
1217 | | |
1218 | 0 | xmlInitParser(); |
1219 | |
|
1220 | 0 | if (encoding != NULL) { |
1221 | 0 | xmlCharEncoding enc; |
1222 | |
|
1223 | 0 | enc = xmlParseCharEncoding(encoding); |
1224 | 0 | if (enc != cur->charset) { |
1225 | 0 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1226 | | /* |
1227 | | * Not supported yet |
1228 | | */ |
1229 | 0 | return(-1); |
1230 | 0 | } |
1231 | | |
1232 | 0 | handler = xmlFindCharEncodingHandler(encoding); |
1233 | 0 | if (handler == NULL) |
1234 | 0 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1235 | 0 | } |
1236 | 0 | htmlSetMetaEncoding(cur, (const xmlChar *) encoding); |
1237 | 0 | } else { |
1238 | 0 | htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); |
1239 | 0 | } |
1240 | | |
1241 | | /* |
1242 | | * Fallback to HTML or ASCII when the encoding is unspecified |
1243 | | */ |
1244 | 0 | if (handler == NULL) |
1245 | 0 | handler = xmlFindCharEncodingHandler("HTML"); |
1246 | 0 | if (handler == NULL) |
1247 | 0 | handler = xmlFindCharEncodingHandler("ascii"); |
1248 | | |
1249 | | /* |
1250 | | * save the content to a temp buffer. |
1251 | | */ |
1252 | 0 | buf = xmlOutputBufferCreateFilename(filename, handler, 0); |
1253 | 0 | if (buf == NULL) return(0); |
1254 | | |
1255 | 0 | htmlDocContentDumpFormatOutput(buf, cur, encoding, format); |
1256 | |
|
1257 | 0 | ret = xmlOutputBufferClose(buf); |
1258 | 0 | return(ret); |
1259 | 0 | } |
1260 | | |
1261 | | /** |
1262 | | * htmlSaveFileEnc: |
1263 | | * @filename: the filename |
1264 | | * @cur: the document |
1265 | | * @encoding: the document encoding |
1266 | | * |
1267 | | * Dump an HTML document to a file using a given encoding |
1268 | | * and formatting returns/spaces are added. |
1269 | | * |
1270 | | * returns: the number of byte written or -1 in case of failure. |
1271 | | */ |
1272 | | int |
1273 | 0 | htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { |
1274 | 0 | return(htmlSaveFileFormat(filename, cur, encoding, 1)); |
1275 | 0 | } |
1276 | | |
1277 | | #endif /* LIBXML_OUTPUT_ENABLED */ |
1278 | | |
1279 | | #define bottom_HTMLtree |
1280 | | #include "elfgcchack.h" |
1281 | | #endif /* LIBXML_HTML_ENABLED */ |