Coverage Report

Created: 2026-01-25 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext-0.26/gettext-tools/libgettextpo/markup.c
Line
Count
Source
1
/* markup.c -- simple XML-like parser
2
   Copyright (C) 2015-2025 Free Software Foundation, Inc.
3
4
   This file is not part of the GNU gettext program, but is used with
5
   GNU gettext.
6
7
   This is a stripped down version of GLib's gmarkup.c.  The original
8
   copyright notice is as follows:
9
*/
10
11
/* gmarkup.c - Simple XML-like parser
12
 *
13
 *  Copyright 2000, 2003 Red Hat, Inc.
14
 *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
15
 *
16
 * GLib is free software; you can redistribute it and/or modify it
17
 * under the terms of the GNU General Public License as
18
 * published by the Free Software Foundation; either version 3 of the
19
 * License, or (at your option) any later version.
20
 *
21
 * GLib is distributed in the hope that it will be useful,
22
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
 * General Public License for more details.
25
 *
26
 * You should have received a copy of the GNU General Public
27
 * License along with GLib; see the file COPYING.LIB.  If not,
28
 * see <https://www.gnu.org/licenses/>.
29
 */
30
31
#include "config.h"
32
33
#include <assert.h>
34
#include <stdarg.h>
35
#include <string.h>
36
#include <stdio.h>
37
#include <stdlib.h>
38
39
/* Specification */
40
#include "markup.h"
41
42
#include "c-ctype.h"
43
#include "gettext.h"
44
#include "gl_linked_list.h"
45
#include "gl_xlist.h"
46
#include "unictype.h"
47
#include "unistr.h"
48
#include "xalloc.h"
49
#include "xvasprintf.h"
50
#include "xstrerror.h"
51
52
0
#define _(s) gettext(s)
53
54
/**
55
 * The "markup" parser is intended to parse a simple markup format
56
 * that's a subset of XML.  This is a small, efficient, easy-to-use
57
 * parser.  It should not be used if you expect to interoperate with
58
 * other applications generating full-scale XML.  However, it's very
59
 * useful for application data files, config files, etc. where you
60
 * know your application will be the only one writing the file.
61
 * Full-scale XML parsers should be able to parse the subset used by
62
 * markup, so you can easily migrate to full-scale XML at a later
63
 * time if the need arises.
64
 *
65
 * The parser is not guaranteed to signal an error on all invalid XML;
66
 * the parser may accept documents that an XML parser would not.
67
 * However, XML documents which are not well-formed (which is a weaker
68
 * condition than being valid.  See the XML specification
69
 * <https://www.w3.org/TR/REC-xml/> for definitions of these terms.)
70
 * are not considered valid GMarkup documents.
71
 *
72
 * Simplifications to XML include:
73
 *
74
 * - Only UTF-8 encoding is allowed
75
 *
76
 * - No user-defined entities
77
 *
78
 * - Processing instructions, comments and the doctype declaration
79
 *   are "passed through" but are not interpreted in any way
80
 *
81
 * - No DTD or validation
82
 *
83
 * The markup format does support:
84
 *
85
 * - Elements
86
 *
87
 * - Attributes
88
 *
89
 * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
90
 *
91
 * - Character references
92
 *
93
 * - Sections marked as CDATA
94
 */
95
96
typedef enum
97
{
98
  STATE_START,
99
  STATE_AFTER_OPEN_ANGLE,
100
  STATE_AFTER_CLOSE_ANGLE,
101
  STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
102
  STATE_INSIDE_OPEN_TAG_NAME,
103
  STATE_INSIDE_ATTRIBUTE_NAME,
104
  STATE_AFTER_ATTRIBUTE_NAME,
105
  STATE_BETWEEN_ATTRIBUTES,
106
  STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
107
  STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
108
  STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
109
  STATE_INSIDE_TEXT,
110
  STATE_AFTER_CLOSE_TAG_SLASH,
111
  STATE_INSIDE_CLOSE_TAG_NAME,
112
  STATE_AFTER_CLOSE_TAG_NAME,
113
  STATE_INSIDE_PASSTHROUGH,
114
  STATE_ERROR
115
} markup_parse_state_ty;
116
117
typedef struct
118
{
119
  const char *prev_element;
120
  const markup_parser_ty *prev_parser;
121
  void *prev_user_data;
122
} markup_recursion_tracker_ty;
123
124
typedef struct
125
{
126
  char *buffer;
127
  size_t bufmax;
128
  size_t buflen;
129
} markup_string_ty;
130
131
struct _markup_parse_context_ty
132
{
133
  const markup_parser_ty *parser;
134
135
  markup_parse_flags_ty flags;
136
137
  int line_number;
138
  int char_number;
139
140
  markup_parse_state_ty state;
141
142
  void *user_data;
143
144
  /* A piece of character data or an element that
145
   * hasn't "ended" yet so we haven't yet called
146
   * the callback for it.
147
   */
148
  markup_string_ty *partial_chunk;
149
150
  gl_list_t tag_stack;          /* <markup_string_ty> */
151
152
  char **attr_names;
153
  char **attr_values;
154
  int cur_attr;
155
  int alloc_attrs;
156
157
  const char *current_text;
158
  ssize_t current_text_len;
159
  const char *current_text_end;
160
161
  /* used to save the start of the last interesting thingy */
162
  const char *start;
163
164
  const char *iter;
165
166
  char *error_text;
167
168
  unsigned int document_empty : 1;
169
  unsigned int parsing : 1;
170
  unsigned int awaiting_pop : 1;
171
  int balance;
172
173
  /* subparser support */
174
  gl_list_t subparser_stack;    /* <markup_recursion_tracker_ty *> */
175
  const char *subparser_element;
176
};
177
178
static markup_string_ty *
179
markup_string_new (void)
180
0
{
181
0
  return XZALLOC (markup_string_ty);
182
0
}
183
184
static char *
185
markup_string_free (markup_string_ty *string, bool free_segment)
186
0
{
187
0
  if (free_segment)
188
0
    {
189
0
      free (string->buffer);
190
0
      free (string);
191
0
      return NULL;
192
0
    }
193
0
  else
194
0
    {
195
0
      char *result = string->buffer;
196
0
      free (string);
197
0
      return result;
198
0
    }
199
0
}
200
201
static void
202
markup_string_free1 (markup_string_ty *string)
203
0
{
204
0
  markup_string_free (string, true);
205
0
}
206
207
static void
208
markup_string_truncate (markup_string_ty *string, size_t length)
209
0
{
210
0
  assert (string && length < string->buflen - 1);
211
0
  string->buffer[length] = '\0';
212
0
  string->buflen = length;
213
0
}
214
215
static void
216
markup_string_append (markup_string_ty *string, const char *to_append,
217
                      size_t length)
218
0
{
219
0
  if (string->buflen + length + 1 > string->bufmax)
220
0
    {
221
0
      string->bufmax *= 2;
222
0
      if (string->buflen + length + 1 > string->bufmax)
223
0
        string->bufmax = string->buflen + length + 1;
224
0
      string->buffer = xrealloc (string->buffer, string->bufmax);
225
0
    }
226
0
  memcpy (string->buffer + string->buflen, to_append, length);
227
0
  string->buffer[length] = '\0';
228
0
  string->buflen = length;
229
0
}
230
231
static inline void
232
string_blank (markup_string_ty *string)
233
0
{
234
0
  if (string->bufmax > 0)
235
0
    {
236
0
      *string->buffer = '\0';
237
0
      string->buflen = 0;
238
0
    }
239
0
}
240
241
/* Creates a new parse context.  A parse context is used to parse
242
   marked-up documents.  You can feed any number of documents into a
243
   context, as long as no errors occur; once an error occurs, the
244
   parse context can't continue to parse text (you have to free it and
245
   create a new parse context).  */
246
markup_parse_context_ty *
247
markup_parse_context_new (const markup_parser_ty *parser,
248
                          markup_parse_flags_ty flags,
249
                          void *user_data)
250
0
{
251
0
  markup_parse_context_ty *context;
252
253
0
  assert (parser != NULL);
254
255
0
  context = XMALLOC (markup_parse_context_ty);
256
257
0
  context->parser = parser;
258
0
  context->flags = flags;
259
0
  context->user_data = user_data;
260
261
0
  context->line_number = 1;
262
0
  context->char_number = 1;
263
264
0
  context->partial_chunk = NULL;
265
266
0
  context->state = STATE_START;
267
0
  context->tag_stack =
268
0
    gl_list_create_empty (GL_LINKED_LIST,
269
0
                          NULL, NULL,
270
0
                          (gl_listelement_dispose_fn) markup_string_free1,
271
0
                          true);
272
0
  context->attr_names = NULL;
273
0
  context->attr_values = NULL;
274
0
  context->cur_attr = -1;
275
0
  context->alloc_attrs = 0;
276
277
0
  context->current_text = NULL;
278
0
  context->current_text_len = -1;
279
0
  context->current_text_end = NULL;
280
281
0
  context->start = NULL;
282
0
  context->iter = NULL;
283
284
0
  context->error_text = NULL;
285
286
0
  context->document_empty = true;
287
0
  context->parsing = false;
288
289
0
  context->awaiting_pop = false;
290
0
  context->subparser_stack =
291
0
    gl_list_create_empty (GL_LINKED_LIST,
292
0
                          NULL, NULL,
293
0
                          (gl_listelement_dispose_fn) free,
294
0
                          true);
295
0
  context->subparser_element = NULL;
296
297
0
  context->balance = 0;
298
299
0
  return context;
300
0
}
301
302
static void clear_attributes (markup_parse_context_ty *context);
303
304
/* Frees a parse context.  This function can't be called from inside
305
   one of the markup_parser_ty functions or while a subparser is
306
   pushed.  */
307
void
308
markup_parse_context_free (markup_parse_context_ty *context)
309
0
{
310
0
  assert (context != NULL);
311
0
  assert (!context->parsing);
312
0
  assert (gl_list_size (context->subparser_stack) == 0);
313
0
  assert (!context->awaiting_pop);
314
315
0
  clear_attributes (context);
316
0
  free (context->attr_names);
317
0
  free (context->attr_values);
318
319
0
  gl_list_free (context->tag_stack);
320
0
  gl_list_free (context->subparser_stack);
321
322
0
  if (context->partial_chunk)
323
0
    markup_string_free (context->partial_chunk, true);
324
325
0
  free (context->error_text);
326
327
0
  free (context);
328
0
}
329
330
static void pop_subparser_stack (markup_parse_context_ty *context);
331
332
static void
333
emit_error (markup_parse_context_ty *context, const char *error_text)
334
0
{
335
0
  context->state = STATE_ERROR;
336
337
0
  if (context->parser->error)
338
0
    (*context->parser->error) (context, error_text, context->user_data);
339
340
  /* report the error all the way up to free all the user-data */
341
0
  while (gl_list_size (context->subparser_stack) > 0)
342
0
    {
343
0
      pop_subparser_stack (context);
344
0
      context->awaiting_pop = false; /* already been freed */
345
346
0
      if (context->parser->error)
347
0
        (*context->parser->error) (context, error_text, context->user_data);
348
0
    }
349
350
0
  if (context->error_text)
351
0
    free (context->error_text);
352
0
  context->error_text = xstrdup (error_text);
353
0
}
354
355
#define IS_COMMON_NAME_END_CHAR(c) \
356
0
  ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
357
358
static bool
359
slow_name_validate (markup_parse_context_ty *context, const char *name)
360
0
{
361
0
  const char *p = name;
362
0
  ucs4_t uc;
363
364
0
  if (u8_check ((const uint8_t *) name, strlen (name)) != NULL)
365
0
    {
366
0
      emit_error (context, _("invalid UTF-8 sequence"));
367
0
      return false;
368
0
    }
369
370
0
  if (!(c_isalpha (*p)
371
0
        || (!IS_COMMON_NAME_END_CHAR (*p)
372
0
            && (*p == '_'
373
0
                || *p == ':'
374
0
                || (u8_mbtouc (&uc, (const uint8_t *) name, strlen (name)) > 0
375
0
                    && uc_is_alpha (uc))))))
376
0
    {
377
0
      char *error_text = xasprintf (_("'%s' is not a valid name: %c"),
378
0
                                    name, *p);
379
0
      emit_error (context, error_text);
380
0
      free (error_text);
381
0
      return false;
382
0
    }
383
384
0
  for (p = (const char *) u8_next (&uc, (const uint8_t *) name);
385
0
       p != NULL;
386
0
       p = (const char *) u8_next (&uc, (const uint8_t *) p))
387
0
    {
388
      /* is_name_char */
389
0
      if (!(c_isalnum (*p)
390
0
            || (!IS_COMMON_NAME_END_CHAR (*p)
391
0
                && (*p == '.' || *p == '-' || *p == '_' || *p == ':'
392
0
                    || uc_is_alpha (uc)))))
393
0
        {
394
0
          char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
395
0
                                        name, *p);
396
0
          emit_error (context, error_text);
397
0
          free (error_text);
398
0
          return false;
399
0
        }
400
0
    }
401
0
  return true;
402
0
}
403
404
/*
405
 * Use me for elements, attributes etc.
406
 */
407
static bool
408
name_validate (markup_parse_context_ty *context, const char *name)
409
0
{
410
0
  char mask;
411
0
  const char *p;
412
413
  /* name start char */
414
0
  p = name;
415
0
  if (IS_COMMON_NAME_END_CHAR (*p)
416
0
      || !(c_isalpha (*p) || *p == '_' || *p == ':'))
417
0
    goto slow_validate;
418
419
0
  for (mask = *p++; *p != '\0'; p++)
420
0
    {
421
0
      mask |= *p;
422
423
      /* is_name_char */
424
0
      if (!(c_isalnum (*p)
425
0
            || (!IS_COMMON_NAME_END_CHAR (*p)
426
0
                && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
427
0
        goto slow_validate;
428
0
    }
429
430
0
  if (mask & 0x80) /* un-common / non-ascii */
431
0
    goto slow_validate;
432
433
0
  return true;
434
435
0
 slow_validate:
436
0
  return slow_name_validate (context, name);
437
0
}
438
439
static bool
440
text_validate (markup_parse_context_ty *context,
441
               const char *p,
442
               int len)
443
0
{
444
0
  if (u8_check ((const uint8_t *) p, len) != NULL)
445
0
    {
446
0
      emit_error (context, _("invalid UTF-8 sequence"));
447
0
      return false;
448
0
    }
449
0
  else
450
0
    return true;
451
0
}
452
453
/*
454
 * re-write the GString in-place, unescaping anything that escaped.
455
 * most XML does not contain entities, or escaping.
456
 */
457
static bool
458
unescape_string_inplace (markup_parse_context_ty *context,
459
                         markup_string_ty *string,
460
                         bool *is_ascii)
461
0
{
462
0
  char mask, *to;
463
0
  const char *from;
464
0
  bool normalize_attribute;
465
466
0
  if (string->buflen == 0)
467
0
    return true;
468
469
0
  *is_ascii = false;
470
471
  /* are we unescaping an attribute or not ? */
472
0
  if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
473
0
      || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
474
0
    normalize_attribute = true;
475
0
  else
476
0
    normalize_attribute = false;
477
478
  /*
479
   * Meeks' theorem: unescaping can only shrink text.
480
   * for &lt; etc. this is obvious, for &#xffff; more
481
   * thought is required, but this is patently so.
482
   */
483
0
  mask = 0;
484
0
  for (from = to = string->buffer; *from != '\0'; from++, to++)
485
0
    {
486
0
      *to = *from;
487
488
0
      mask |= *to;
489
0
      if (normalize_attribute && (*to == '\t' || *to == '\n'))
490
0
        *to = ' ';
491
0
      if (*to == '\r')
492
0
        {
493
0
          *to = normalize_attribute ? ' ' : '\n';
494
0
          if (from[1] == '\n')
495
0
            from++;
496
0
        }
497
0
      if (*from == '&')
498
0
        {
499
0
          from++;
500
0
          if (*from == '#')
501
0
            {
502
0
              int base = 10;
503
0
              unsigned long l;
504
0
              char *end = NULL;
505
506
0
              from++;
507
508
0
              if (*from == 'x')
509
0
                {
510
0
                  base = 16;
511
0
                  from++;
512
0
                }
513
514
0
              if (!(base == 16 ? c_isxdigit (*from) : c_isdigit (*from))
515
0
                  || /* No need to reset and test errno here, because in case
516
                        of overflow, l will be == ULONG_MAX, which is
517
                        > 0x10FFFF.  */
518
0
                     (l = strtoul (from, &end, base),
519
0
                      end == from))
520
0
                {
521
0
                  char *error_text =
522
0
                    xasprintf (_("invalid character reference: %s"),
523
0
                               _("not a valid number specification"));
524
0
                  emit_error (context, error_text);
525
0
                  free (error_text);
526
0
                  return false;
527
0
                }
528
0
              else if (*end != ';')
529
0
                {
530
0
                  char *error_text =
531
0
                    xasprintf (_("invalid character reference: %s"),
532
0
                               _("no ending ';'"));
533
0
                  emit_error (context, error_text);
534
0
                  free (error_text);
535
0
                  return false;
536
0
                }
537
0
              else if (/* characters XML 1.1 permits */
538
0
                       (0 < l && l <= 0xD7FF)
539
0
                       || (0xE000 <= l && l <= 0xFFFD) || (0x10000 <= l && l <= 0x10FFFF))
540
0
                {
541
0
                  char buf[8];
542
0
                  int length;
543
0
                  length = u8_uctomb ((uint8_t *) buf, l, 8);
544
0
                  memcpy (to, buf, length);
545
0
                  to += length - 1;
546
0
                  from = end;
547
0
                  if (l >= 0x80) /* not ASCII */
548
0
                    mask |= 0x80;
549
0
                }
550
0
              else
551
0
                {
552
0
                  char *error_text =
553
0
                    xasprintf (_("invalid character reference: %s"),
554
0
                               _("non-permitted character"));
555
0
                  emit_error (context, error_text);
556
0
                  free (error_text);
557
0
                  return false;
558
0
                }
559
0
            }
560
561
0
          else if (strncmp (from, "lt;", 3) == 0)
562
0
            {
563
0
              *to = '<';
564
0
              from += 2;
565
0
            }
566
0
          else if (strncmp (from, "gt;", 3) == 0)
567
0
            {
568
0
              *to = '>';
569
0
              from += 2;
570
0
            }
571
0
          else if (strncmp (from, "amp;", 4) == 0)
572
0
            {
573
0
              *to = '&';
574
0
              from += 3;
575
0
            }
576
0
          else if (strncmp (from, "quot;", 5) == 0)
577
0
            {
578
0
              *to = '"';
579
0
              from += 4;
580
0
            }
581
0
          else if (strncmp (from, "apos;", 5) == 0)
582
0
            {
583
0
              *to = '\'';
584
0
              from += 4;
585
0
            }
586
0
          else
587
0
            {
588
0
              const char *reason;
589
0
              char *error_text;
590
591
0
              if (*from == ';')
592
0
                reason = _("empty");
593
0
              else
594
0
                {
595
0
                  const char *end = strchr (from, ';');
596
0
                  if (end)
597
0
                    reason = _("unknown");
598
0
                  else
599
0
                    reason = _("no ending ';'");
600
0
                }
601
0
              error_text = xasprintf (_("invalid entity reference: %s"),
602
0
                                      reason);
603
0
              emit_error (context, error_text);
604
0
              free (error_text);
605
0
              return false;
606
0
            }
607
0
        }
608
0
    }
609
610
0
  assert (to - string->buffer <= string->buflen);
611
0
  if (to - string->buffer != string->buflen)
612
0
    markup_string_truncate (string, to - string->buffer);
613
614
0
  *is_ascii = !(mask & 0x80);
615
616
0
  return true;
617
0
}
618
619
static inline bool
620
advance_char (markup_parse_context_ty *context)
621
0
{
622
0
  context->iter++;
623
0
  context->char_number++;
624
625
0
  if (context->iter == context->current_text_end)
626
0
      return false;
627
628
0
  else if (*context->iter == '\n')
629
0
    {
630
0
      context->line_number++;
631
0
      context->char_number = 1;
632
0
    }
633
634
0
  return true;
635
0
}
636
637
static inline bool
638
xml_isspace (char c)
639
0
{
640
0
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
641
0
}
642
643
static void
644
skip_spaces (markup_parse_context_ty *context)
645
0
{
646
0
  do
647
0
    {
648
0
      if (!xml_isspace (*context->iter))
649
0
        return;
650
0
    }
651
0
  while (advance_char (context));
652
0
}
653
654
static void
655
advance_to_name_end (markup_parse_context_ty *context)
656
0
{
657
0
  do
658
0
    {
659
0
      if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
660
0
        return;
661
0
      if (xml_isspace (*(context->iter)))
662
0
        return;
663
0
    }
664
0
  while (advance_char (context));
665
0
}
666
667
static void
668
add_to_partial (markup_parse_context_ty *context,
669
                const char         *text_start,
670
                const char         *text_end)
671
0
{
672
0
  if (context->partial_chunk == NULL)
673
0
    { /* allocate a new chunk to parse into */
674
675
0
      context->partial_chunk = markup_string_new ();
676
0
    }
677
678
0
  if (text_start != text_end)
679
0
    markup_string_append (context->partial_chunk,
680
0
                          text_start, text_end - text_start);
681
0
}
682
683
static inline void
684
truncate_partial (markup_parse_context_ty *context)
685
0
{
686
0
  if (context->partial_chunk != NULL)
687
0
    string_blank (context->partial_chunk);
688
0
}
689
690
static inline const char*
691
current_element (markup_parse_context_ty *context)
692
0
{
693
0
  const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
694
0
  return string->buffer;
695
0
}
696
697
static void
698
pop_subparser_stack (markup_parse_context_ty *context)
699
0
{
700
0
  markup_recursion_tracker_ty *tracker;
701
702
0
  assert (gl_list_size (context->subparser_stack) > 0);
703
704
0
  tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
705
706
0
  context->awaiting_pop = true;
707
708
0
  context->user_data = tracker->prev_user_data;
709
0
  context->parser = tracker->prev_parser;
710
0
  context->subparser_element = tracker->prev_element;
711
0
  free (tracker);
712
713
0
  gl_list_remove_at (context->subparser_stack, 0);
714
0
}
715
716
static void
717
push_partial_as_tag (markup_parse_context_ty *context)
718
0
{
719
0
  gl_list_add_first (context->tag_stack, context->partial_chunk);
720
0
  context->partial_chunk = NULL;
721
0
}
722
723
static void
724
pop_tag (markup_parse_context_ty *context)
725
0
{
726
0
  gl_list_remove_at (context->tag_stack, 0);
727
0
}
728
729
static void
730
possibly_finish_subparser (markup_parse_context_ty *context)
731
0
{
732
0
  if (current_element (context) == context->subparser_element)
733
0
    pop_subparser_stack (context);
734
0
}
735
736
static void
737
ensure_no_outstanding_subparser (markup_parse_context_ty *context)
738
0
{
739
0
  context->awaiting_pop = false;
740
0
}
741
742
static void
743
add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
744
0
{
745
0
  if (context->cur_attr + 2 >= context->alloc_attrs)
746
0
    {
747
0
      context->alloc_attrs += 5; /* silly magic number */
748
0
      context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
749
0
      context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
750
0
    }
751
0
  context->cur_attr++;
752
0
  context->attr_names[context->cur_attr] = xstrdup (string->buffer);
753
0
  context->attr_values[context->cur_attr] = NULL;
754
0
  context->attr_names[context->cur_attr+1] = NULL;
755
0
  context->attr_values[context->cur_attr+1] = NULL;
756
0
}
757
758
static void
759
clear_attributes (markup_parse_context_ty *context)
760
0
{
761
  /* Go ahead and free the attributes. */
762
0
  for (; context->cur_attr >= 0; context->cur_attr--)
763
0
    {
764
0
      int pos = context->cur_attr;
765
0
      free (context->attr_names[pos]);
766
0
      free (context->attr_values[pos]);
767
0
      context->attr_names[pos] = context->attr_values[pos] = NULL;
768
0
    }
769
0
  assert (context->cur_attr == -1);
770
0
  assert (context->attr_names == NULL || context->attr_names[0] == NULL);
771
0
  assert (context->attr_values == NULL || context->attr_values[0] == NULL);
772
0
}
773
774
static void
775
markup_parse_context_push (markup_parse_context_ty *context,
776
                           const markup_parser_ty *parser,
777
                           void *user_data)
778
0
{
779
0
  markup_recursion_tracker_ty *tracker;
780
781
0
  tracker = XMALLOC (markup_recursion_tracker_ty);
782
0
  tracker->prev_element = context->subparser_element;
783
0
  tracker->prev_parser = context->parser;
784
0
  tracker->prev_user_data = context->user_data;
785
786
0
  context->subparser_element = current_element (context);
787
0
  context->parser = parser;
788
0
  context->user_data = user_data;
789
790
0
  gl_list_add_first (context->subparser_stack, tracker);
791
0
}
792
793
static void
794
markup_parse_context_pop (markup_parse_context_ty *context)
795
0
{
796
0
  if (!context->awaiting_pop)
797
0
    possibly_finish_subparser (context);
798
799
0
  assert (context->awaiting_pop);
800
801
0
  context->awaiting_pop = false;
802
0
}
803
804
/* This has to be a separate function to ensure the alloca's
805
 * are unwound on exit - otherwise we grow & blow the stack
806
 * with large documents
807
 */
808
static inline void
809
emit_start_element (markup_parse_context_ty *context)
810
0
{
811
0
  int i, j = 0;
812
0
  const char *start_name;
813
0
  const char **attr_names;
814
0
  const char **attr_values;
815
816
  /* In case we want to ignore qualified tags and we see that we have
817
   * one here, we push a subparser.  This will ignore all tags inside of
818
   * the qualified tag.
819
   *
820
   * We deal with the end of the subparser from emit_end_element.
821
   */
822
0
  if ((context->flags & MARKUP_IGNORE_QUALIFIED)
823
0
      && strchr (current_element (context), ':'))
824
0
    {
825
0
      static const markup_parser_ty ignore_parser;
826
0
      markup_parse_context_push (context, &ignore_parser, NULL);
827
0
      clear_attributes (context);
828
0
      return;
829
0
    }
830
831
0
  attr_names = XCALLOC (context->cur_attr + 2, const char *);
832
0
  attr_values = XCALLOC (context->cur_attr + 2, const char *);
833
0
  for (i = 0; i < context->cur_attr + 1; i++)
834
0
    {
835
      /* Possibly omit qualified attribute names from the list */
836
0
      if ((context->flags & MARKUP_IGNORE_QUALIFIED)
837
0
          && strchr (context->attr_names[i], ':'))
838
0
        continue;
839
840
0
      attr_names[j] = context->attr_names[i];
841
0
      attr_values[j] = context->attr_values[i];
842
0
      j++;
843
0
    }
844
0
  attr_names[j] = NULL;
845
0
  attr_values[j] = NULL;
846
847
  /* Call user callback for element start */
848
0
  start_name = current_element (context);
849
850
0
  if (context->parser->start_element && name_validate (context, start_name))
851
0
    (* context->parser->start_element) (context,
852
0
                                        start_name,
853
0
                                        (const char **)attr_names,
854
0
                                        (const char **)attr_values,
855
0
                                        context->user_data);
856
0
  free (attr_names);
857
0
  free (attr_values);
858
0
  clear_attributes (context);
859
0
}
860
861
static void
862
emit_end_element (markup_parse_context_ty *context)
863
0
{
864
0
  assert (gl_list_size (context->tag_stack) != 0);
865
866
0
  possibly_finish_subparser (context);
867
868
  /* We might have just returned from our ignore subparser */
869
0
  if ((context->flags & MARKUP_IGNORE_QUALIFIED)
870
0
      && strchr (current_element (context), ':'))
871
0
    {
872
0
      markup_parse_context_pop (context);
873
0
      pop_tag (context);
874
0
      return;
875
0
    }
876
877
0
  if (context->parser->end_element)
878
0
    (* context->parser->end_element) (context,
879
0
                                      current_element (context),
880
0
                                      context->user_data);
881
882
0
  ensure_no_outstanding_subparser (context);
883
884
0
  pop_tag (context);
885
0
}
886
887
/* Feed some data to the parse context.  The data need not be valid
888
   UTF-8; an error will be signaled if it's invalid.  The data need
889
   not be an entire document; you can feed a document into the parser
890
   incrementally, via multiple calls to this function.  Typically, as
891
   you receive data from a network connection or file, you feed each
892
   received chunk of data into this function, aborting the process if
893
   an error occurs. Once an error is reported, no further data may be
894
   fed to the parse context; all errors are fatal.  */
895
bool
896
markup_parse_context_parse (markup_parse_context_ty *context,
897
                            const char *text,
898
                            ssize_t text_len)
899
0
{
900
0
  assert (context != NULL);
901
0
  assert (text != NULL);
902
0
  assert (context->state != STATE_ERROR);
903
0
  assert (!context->parsing);
904
905
0
  if (text_len < 0)
906
0
    text_len = strlen (text);
907
908
0
  if (text_len == 0)
909
0
    return true;
910
911
0
  context->parsing = true;
912
913
914
0
  context->current_text = text;
915
0
  context->current_text_len = text_len;
916
0
  context->current_text_end = context->current_text + text_len;
917
0
  context->iter = context->current_text;
918
0
  context->start = context->iter;
919
920
0
  while (context->iter != context->current_text_end)
921
0
    {
922
0
      switch (context->state)
923
0
        {
924
0
        case STATE_START:
925
          /* Possible next state: AFTER_OPEN_ANGLE */
926
927
0
          assert (gl_list_size (context->tag_stack) == 0);
928
929
          /* whitespace is ignored outside of any elements */
930
0
          skip_spaces (context);
931
932
0
          if (context->iter != context->current_text_end)
933
0
            {
934
0
              if (*context->iter == '<')
935
0
                {
936
                  /* Move after the open angle */
937
0
                  advance_char (context);
938
939
0
                  context->state = STATE_AFTER_OPEN_ANGLE;
940
941
                  /* this could start a passthrough */
942
0
                  context->start = context->iter;
943
944
                  /* document is now non-empty */
945
0
                  context->document_empty = false;
946
0
                }
947
0
              else
948
0
                {
949
0
                  emit_error (context,
950
0
                              _("document must begin with an element"));
951
0
                }
952
0
            }
953
0
          break;
954
955
0
        case STATE_AFTER_OPEN_ANGLE:
956
          /* Possible next states: INSIDE_OPEN_TAG_NAME,
957
           *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
958
           */
959
0
          if (*context->iter == '?' || *context->iter == '!')
960
0
            {
961
              /* include < in the passthrough */
962
0
              const char *openangle = "<";
963
0
              add_to_partial (context, openangle, openangle + 1);
964
0
              context->start = context->iter;
965
0
              context->balance = 1;
966
0
              context->state = STATE_INSIDE_PASSTHROUGH;
967
0
            }
968
0
          else if (*context->iter == '/')
969
0
            {
970
              /* move after it */
971
0
              advance_char (context);
972
973
0
              context->state = STATE_AFTER_CLOSE_TAG_SLASH;
974
0
            }
975
0
          else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
976
0
            {
977
0
              context->state = STATE_INSIDE_OPEN_TAG_NAME;
978
979
              /* start of tag name */
980
0
              context->start = context->iter;
981
0
            }
982
0
          else
983
0
            {
984
0
              char *error_text = xasprintf (_("invalid character after '%s'"),
985
0
                                            "<");
986
0
              emit_error (context, error_text);
987
0
              free (error_text);
988
0
            }
989
0
          break;
990
991
          /* The AFTER_CLOSE_ANGLE state is actually sort of
992
           * broken, because it doesn't correspond to a range
993
           * of characters in the input stream as the others do,
994
           * and thus makes things harder to conceptualize
995
           */
996
0
        case STATE_AFTER_CLOSE_ANGLE:
997
          /* Possible next states: INSIDE_TEXT, STATE_START */
998
0
          if (gl_list_size (context->tag_stack) == 0)
999
0
            {
1000
0
              context->start = NULL;
1001
0
              context->state = STATE_START;
1002
0
            }
1003
0
          else
1004
0
            {
1005
0
              context->start = context->iter;
1006
0
              context->state = STATE_INSIDE_TEXT;
1007
0
            }
1008
0
          break;
1009
1010
0
        case STATE_AFTER_ELISION_SLASH:
1011
          /* Possible next state: AFTER_CLOSE_ANGLE */
1012
0
          if (*context->iter == '>')
1013
0
            {
1014
              /* move after the close angle */
1015
0
              advance_char (context);
1016
0
              context->state = STATE_AFTER_CLOSE_ANGLE;
1017
0
              emit_end_element (context);
1018
0
            }
1019
0
          else
1020
0
            {
1021
0
              char *error_text = xasprintf (_("missing '%c'"), '>');
1022
0
              emit_error (context, error_text);
1023
0
              free (error_text);
1024
0
            }
1025
0
          break;
1026
1027
0
        case STATE_INSIDE_OPEN_TAG_NAME:
1028
          /* Possible next states: BETWEEN_ATTRIBUTES */
1029
1030
          /* if there's a partial chunk then it's the first part of the
1031
           * tag name. If there's a context->start then it's the start
1032
           * of the tag name in current_text, the partial chunk goes
1033
           * before that start though.
1034
           */
1035
0
          advance_to_name_end (context);
1036
1037
0
          if (context->iter == context->current_text_end)
1038
0
            {
1039
              /* The name hasn't necessarily ended. Merge with
1040
               * partial chunk, leave state unchanged.
1041
               */
1042
0
              add_to_partial (context, context->start, context->iter);
1043
0
            }
1044
0
          else
1045
0
            {
1046
              /* The name has ended. Combine it with the partial chunk
1047
               * if any; push it on the stack; enter next state.
1048
               */
1049
0
              add_to_partial (context, context->start, context->iter);
1050
0
              push_partial_as_tag (context);
1051
1052
0
              context->state = STATE_BETWEEN_ATTRIBUTES;
1053
0
              context->start = NULL;
1054
0
            }
1055
0
          break;
1056
1057
0
        case STATE_INSIDE_ATTRIBUTE_NAME:
1058
          /* Possible next states: AFTER_ATTRIBUTE_NAME */
1059
1060
0
          advance_to_name_end (context);
1061
0
          add_to_partial (context, context->start, context->iter);
1062
1063
          /* read the full name, if we enter the equals sign state
1064
           * then add the attribute to the list (without the value),
1065
           * otherwise store a partial chunk to be prepended later.
1066
           */
1067
0
          if (context->iter != context->current_text_end)
1068
0
            context->state = STATE_AFTER_ATTRIBUTE_NAME;
1069
0
          break;
1070
1071
0
        case STATE_AFTER_ATTRIBUTE_NAME:
1072
          /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1073
1074
0
          skip_spaces (context);
1075
1076
0
          if (context->iter != context->current_text_end)
1077
0
            {
1078
              /* The name has ended. Combine it with the partial chunk
1079
               * if any; push it on the stack; enter next state.
1080
               */
1081
0
              if (!name_validate (context, context->partial_chunk->buffer))
1082
0
                break;
1083
1084
0
              add_attribute (context, context->partial_chunk);
1085
1086
0
              markup_string_free (context->partial_chunk, true);
1087
0
              context->partial_chunk = NULL;
1088
0
              context->start = NULL;
1089
1090
0
              if (*context->iter == '=')
1091
0
                {
1092
0
                  advance_char (context);
1093
0
                  context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1094
0
                }
1095
0
              else
1096
0
                {
1097
0
                  char *error_text = xasprintf (_("missing '%c'"), '=');
1098
0
                  emit_error (context, error_text);
1099
0
                  free (error_text);
1100
0
                }
1101
0
            }
1102
0
          break;
1103
1104
0
        case STATE_BETWEEN_ATTRIBUTES:
1105
          /* Possible next states: AFTER_CLOSE_ANGLE,
1106
           * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1107
           */
1108
0
          skip_spaces (context);
1109
1110
0
          if (context->iter != context->current_text_end)
1111
0
            {
1112
0
              if (*context->iter == '/')
1113
0
                {
1114
0
                  advance_char (context);
1115
0
                  context->state = STATE_AFTER_ELISION_SLASH;
1116
0
                }
1117
0
              else if (*context->iter == '>')
1118
0
                {
1119
0
                  advance_char (context);
1120
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1121
0
                }
1122
0
              else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1123
0
                {
1124
0
                  context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1125
                  /* start of attribute name */
1126
0
                  context->start = context->iter;
1127
0
                }
1128
0
              else
1129
0
                {
1130
0
                  char *error_text = xasprintf (_("missing '%c' or '%c'"),
1131
0
                                                '>', '/');
1132
0
                  emit_error (context, error_text);
1133
0
                  free (error_text);
1134
0
                }
1135
1136
              /* If we're done with attributes, invoke
1137
               * the start_element callback
1138
               */
1139
0
              if (context->state == STATE_AFTER_ELISION_SLASH
1140
0
                  || context->state == STATE_AFTER_CLOSE_ANGLE)
1141
0
                emit_start_element (context);
1142
0
            }
1143
0
          break;
1144
1145
0
        case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1146
          /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1147
1148
0
          skip_spaces (context);
1149
1150
0
          if (context->iter != context->current_text_end)
1151
0
            {
1152
0
              if (*context->iter == '"')
1153
0
                {
1154
0
                  advance_char (context);
1155
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1156
0
                  context->start = context->iter;
1157
0
                }
1158
0
              else if (*context->iter == '\'')
1159
0
                {
1160
0
                  advance_char (context);
1161
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1162
0
                  context->start = context->iter;
1163
0
                }
1164
0
              else
1165
0
                {
1166
0
                  char *error_text = xasprintf (_("missing '%c' or '%c'"),
1167
0
                                                '\'', '"');
1168
0
                  emit_error (context, error_text);
1169
0
                  free (error_text);
1170
0
                }
1171
0
            }
1172
0
          break;
1173
1174
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1175
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1176
          /* Possible next states: BETWEEN_ATTRIBUTES */
1177
0
          {
1178
0
            char delim;
1179
1180
0
            if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1181
0
              {
1182
0
                delim = '\'';
1183
0
              }
1184
0
            else
1185
0
              {
1186
0
                delim = '"';
1187
0
              }
1188
1189
0
            do
1190
0
              {
1191
0
                if (*context->iter == delim)
1192
0
                  break;
1193
0
              }
1194
0
            while (advance_char (context));
1195
0
          }
1196
0
          if (context->iter == context->current_text_end)
1197
0
            {
1198
              /* The value hasn't necessarily ended. Merge with
1199
               * partial chunk, leave state unchanged.
1200
               */
1201
0
              add_to_partial (context, context->start, context->iter);
1202
0
            }
1203
0
          else
1204
0
            {
1205
0
              bool is_ascii;
1206
              /* The value has ended at the quote mark. Combine it
1207
               * with the partial chunk if any; set it for the current
1208
               * attribute.
1209
               */
1210
0
              add_to_partial (context, context->start, context->iter);
1211
1212
0
              assert (context->cur_attr >= 0);
1213
1214
0
              if (unescape_string_inplace (context, context->partial_chunk,
1215
0
                                           &is_ascii)
1216
0
                  && (is_ascii
1217
0
                      || text_validate (context,
1218
0
                                        context->partial_chunk->buffer,
1219
0
                                        context->partial_chunk->buflen)))
1220
0
                {
1221
                  /* success, advance past quote and set state. */
1222
0
                  context->attr_values[context->cur_attr] =
1223
0
                    markup_string_free (context->partial_chunk, false);
1224
0
                  context->partial_chunk = NULL;
1225
0
                  advance_char (context);
1226
0
                  context->state = STATE_BETWEEN_ATTRIBUTES;
1227
0
                  context->start = NULL;
1228
0
                }
1229
1230
0
              truncate_partial (context);
1231
0
            }
1232
0
          break;
1233
1234
0
        case STATE_INSIDE_TEXT:
1235
          /* Possible next states: AFTER_OPEN_ANGLE */
1236
0
          do
1237
0
            {
1238
0
              if (*context->iter == '<')
1239
0
                break;
1240
0
            }
1241
0
          while (advance_char (context));
1242
1243
          /* The text hasn't necessarily ended. Merge with
1244
           * partial chunk, leave state unchanged.
1245
           */
1246
1247
0
          add_to_partial (context, context->start, context->iter);
1248
1249
0
          if (context->iter != context->current_text_end)
1250
0
            {
1251
0
              bool is_ascii;
1252
1253
              /* The text has ended at the open angle. Call the text
1254
               * callback.
1255
               */
1256
0
              if (unescape_string_inplace (context, context->partial_chunk,
1257
0
                                           &is_ascii)
1258
0
                  && (is_ascii
1259
0
                      || text_validate (context,
1260
0
                                        context->partial_chunk->buffer,
1261
0
                                        context->partial_chunk->buflen)))
1262
0
                {
1263
0
                  if (context->parser->text)
1264
0
                    (*context->parser->text) (context,
1265
0
                                              context->partial_chunk->buffer,
1266
0
                                              context->partial_chunk->buflen,
1267
0
                                              context->user_data);
1268
1269
                  /* advance past open angle and set state. */
1270
0
                  advance_char (context);
1271
0
                  context->state = STATE_AFTER_OPEN_ANGLE;
1272
                  /* could begin a passthrough */
1273
0
                  context->start = context->iter;
1274
0
                }
1275
1276
0
              truncate_partial (context);
1277
0
            }
1278
0
          break;
1279
1280
0
        case STATE_AFTER_CLOSE_TAG_SLASH:
1281
          /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1282
0
          if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1283
0
            {
1284
0
              context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1285
1286
              /* start of tag name */
1287
0
              context->start = context->iter;
1288
0
            }
1289
0
          else
1290
0
            {
1291
0
              char *error_text = xasprintf (_("invalid character after '%s'"),
1292
0
                                            "</");
1293
0
              emit_error (context, error_text);
1294
0
              free (error_text);
1295
0
            }
1296
0
          break;
1297
1298
0
        case STATE_INSIDE_CLOSE_TAG_NAME:
1299
          /* Possible next state: AFTER_CLOSE_TAG_NAME */
1300
0
          advance_to_name_end (context);
1301
0
          add_to_partial (context, context->start, context->iter);
1302
1303
0
          if (context->iter != context->current_text_end)
1304
0
            context->state = STATE_AFTER_CLOSE_TAG_NAME;
1305
0
          break;
1306
1307
0
        case STATE_AFTER_CLOSE_TAG_NAME:
1308
          /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1309
1310
0
          skip_spaces (context);
1311
1312
0
          if (context->iter != context->current_text_end)
1313
0
            {
1314
0
              markup_string_ty *close_name;
1315
1316
0
              close_name = context->partial_chunk;
1317
0
              context->partial_chunk = NULL;
1318
1319
0
              if (*context->iter != '>')
1320
0
                {
1321
0
                  char *error_text =
1322
0
                    xasprintf (_("invalid character after '%s'"),
1323
0
                               _("a close element name"));
1324
0
                  emit_error (context, error_text);
1325
0
                  free (error_text);
1326
0
                }
1327
0
              else if (gl_list_size (context->tag_stack) == 0)
1328
0
                {
1329
0
                  emit_error (context, _("element is closed"));
1330
0
                }
1331
0
              else if (strcmp (close_name->buffer, current_element (context))
1332
0
                       != 0)
1333
0
                {
1334
0
                  emit_error (context, _("element is closed"));
1335
0
                }
1336
0
              else
1337
0
                {
1338
0
                  advance_char (context);
1339
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1340
0
                  context->start = NULL;
1341
1342
0
                  emit_end_element (context);
1343
0
                }
1344
0
              context->partial_chunk = close_name;
1345
0
              truncate_partial (context);
1346
0
            }
1347
0
          break;
1348
1349
0
        case STATE_INSIDE_PASSTHROUGH:
1350
          /* Possible next state: AFTER_CLOSE_ANGLE */
1351
0
          do
1352
0
            {
1353
0
              if (*context->iter == '<')
1354
0
                context->balance++;
1355
0
              if (*context->iter == '>')
1356
0
                {
1357
0
                  char *str;
1358
0
                  size_t len;
1359
1360
0
                  context->balance--;
1361
0
                  add_to_partial (context, context->start, context->iter);
1362
0
                  context->start = context->iter;
1363
1364
0
                  str = context->partial_chunk->buffer;
1365
0
                  len = context->partial_chunk->buflen;
1366
1367
0
                  if (str[1] == '?' && str[len - 1] == '?')
1368
0
                    break;
1369
0
                  if (strncmp (str, "<!--", 4) == 0
1370
0
                      && strcmp (str + len - 2, "--") == 0)
1371
0
                    break;
1372
0
                  if (strncmp (str, "<![CDATA[", 9) == 0
1373
0
                      && strcmp (str + len - 2, "]]") == 0)
1374
0
                    break;
1375
0
                  if (strncmp (str, "<!DOCTYPE", 9) == 0
1376
0
                      && context->balance == 0)
1377
0
                    break;
1378
0
                }
1379
0
            }
1380
0
          while (advance_char (context));
1381
1382
0
          if (context->iter == context->current_text_end)
1383
0
            {
1384
              /* The passthrough hasn't necessarily ended. Merge with
1385
               * partial chunk, leave state unchanged.
1386
               */
1387
0
               add_to_partial (context, context->start, context->iter);
1388
0
            }
1389
0
          else
1390
0
            {
1391
              /* The passthrough has ended at the close angle. Combine
1392
               * it with the partial chunk if any. Call the passthrough
1393
               * callback. Note that the open/close angles are
1394
               * included in the text of the passthrough.
1395
               */
1396
0
              advance_char (context); /* advance past close angle */
1397
0
              add_to_partial (context, context->start, context->iter);
1398
1399
0
              if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT
1400
0
                  && strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
1401
0
                {
1402
0
                  if (context->parser->text
1403
0
                      && text_validate (context,
1404
0
                                        context->partial_chunk->buffer + 9,
1405
0
                                        context->partial_chunk->buflen - 12))
1406
0
                    (*context->parser->text) (context,
1407
0
                                              context->partial_chunk->buffer + 9,
1408
0
                                              context->partial_chunk->buflen - 12,
1409
0
                                              context->user_data);
1410
0
                }
1411
0
              else if (context->parser->passthrough
1412
0
                       && text_validate (context,
1413
0
                                         context->partial_chunk->buffer,
1414
0
                                         context->partial_chunk->buflen))
1415
0
                (*context->parser->passthrough) (context,
1416
0
                                                 context->partial_chunk->buffer,
1417
0
                                                 context->partial_chunk->buflen,
1418
0
                                                 context->user_data);
1419
1420
0
              truncate_partial (context);
1421
1422
0
              context->state = STATE_AFTER_CLOSE_ANGLE;
1423
0
              context->start = context->iter; /* could begin text */
1424
0
            }
1425
0
          break;
1426
1427
0
        case STATE_ERROR:
1428
0
          goto finished;
1429
0
          break;
1430
1431
0
        default:
1432
0
          abort ();
1433
0
          break;
1434
0
        }
1435
0
    }
1436
1437
0
 finished:
1438
0
  context->parsing = false;
1439
1440
0
  return context->state != STATE_ERROR;
1441
0
}
1442
1443
/* Signals to the parse context that all data has been fed into the
1444
 * parse context with markup_parse_context_parse.
1445
 *
1446
 * This function reports an error if the document isn't complete,
1447
 * for example if elements are still open.  */
1448
bool
1449
markup_parse_context_end_parse (markup_parse_context_ty *context)
1450
0
{
1451
0
  const char *location = NULL;
1452
1453
0
  assert (context != NULL);
1454
0
  assert (!context->parsing);
1455
0
  assert (context->state != STATE_ERROR);
1456
1457
0
  if (context->partial_chunk != NULL)
1458
0
    {
1459
0
      markup_string_free (context->partial_chunk, true);
1460
0
      context->partial_chunk = NULL;
1461
0
    }
1462
1463
0
  if (context->document_empty)
1464
0
    {
1465
0
      emit_error (context, _("empty document"));
1466
0
      return false;
1467
0
    }
1468
1469
0
  context->parsing = true;
1470
1471
0
  switch (context->state)
1472
0
    {
1473
0
    case STATE_START:
1474
      /* Nothing to do */
1475
0
      break;
1476
1477
0
    case STATE_AFTER_OPEN_ANGLE:
1478
0
      location = _("after '<'");
1479
0
      break;
1480
1481
0
    case STATE_AFTER_CLOSE_ANGLE:
1482
0
      if (gl_list_size (context->tag_stack) > 0)
1483
0
        {
1484
          /* Error message the same as for INSIDE_TEXT */
1485
0
          location = _("elements still open");
1486
0
        }
1487
0
      break;
1488
1489
0
    case STATE_AFTER_ELISION_SLASH:
1490
0
      location = _("missing '>'");
1491
0
      break;
1492
1493
0
    case STATE_INSIDE_OPEN_TAG_NAME:
1494
0
      location = _("inside an element name");
1495
0
      break;
1496
1497
0
    case STATE_INSIDE_ATTRIBUTE_NAME:
1498
0
    case STATE_AFTER_ATTRIBUTE_NAME:
1499
0
      location = _("inside an attribute name");
1500
0
      break;
1501
1502
0
    case STATE_BETWEEN_ATTRIBUTES:
1503
0
      location = _("inside an open tag");
1504
0
      break;
1505
1506
0
    case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1507
0
      location = _("after '='");
1508
0
      break;
1509
1510
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1511
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1512
0
      location = _("inside an attribute value");
1513
0
      break;
1514
1515
0
    case STATE_INSIDE_TEXT:
1516
0
      assert (gl_list_size (context->tag_stack) > 0);
1517
0
      location = _("elements still open");
1518
0
      break;
1519
1520
0
    case STATE_AFTER_CLOSE_TAG_SLASH:
1521
0
    case STATE_INSIDE_CLOSE_TAG_NAME:
1522
0
    case STATE_AFTER_CLOSE_TAG_NAME:
1523
0
      location = _("inside the close tag");
1524
0
      break;
1525
1526
0
    case STATE_INSIDE_PASSTHROUGH:
1527
0
      location = _("inside a comment or processing instruction");
1528
0
      break;
1529
1530
0
    case STATE_ERROR:
1531
0
    default:
1532
0
      abort ();
1533
0
      break;
1534
0
    }
1535
1536
0
  if (location != NULL)
1537
0
    {
1538
0
      char *error_text = xasprintf (_("document ended unexpectedly: %s"),
1539
0
                                    location);
1540
0
      emit_error (context, error_text);
1541
0
      free (error_text);
1542
0
    }
1543
1544
0
  context->parsing = false;
1545
1546
0
  return context->state != STATE_ERROR;
1547
0
}
1548
1549
const char *
1550
markup_parse_context_get_error (markup_parse_context_ty *context)
1551
0
{
1552
0
  return context->error_text;
1553
0
}