Coverage Report

Created: 2026-03-12 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext/gettext-tools/libgettextpo/markup.c
Line
Count
Source
1
/* markup.c -- simple XML-like parser
2
   Copyright (C) 2015-2025 Free Software Foundation, Inc.
3
4
   This file is not part of the GNU gettext program, but is used with
5
   GNU gettext.
6
7
   This is a stripped down version of GLib's gmarkup.c.  The original
8
   copyright notice is as follows:
9
*/
10
11
/* gmarkup.c - Simple XML-like parser
12
 *
13
 *  Copyright 2000, 2003 Red Hat, Inc.
14
 *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
15
 *
16
 * GLib is free software; you can redistribute it and/or modify it
17
 * under the terms of the GNU General Public License as
18
 * published by the Free Software Foundation; either version 3 of the
19
 * License, or (at your option) any later version.
20
 *
21
 * GLib is distributed in the hope that it will be useful,
22
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
 * General Public License for more details.
25
 *
26
 * You should have received a copy of the GNU General Public
27
 * License along with GLib; see the file COPYING.LIB.  If not,
28
 * see <https://www.gnu.org/licenses/>.
29
 */
30
31
#include <config.h>
32
33
#include <assert.h>
34
#include <stdarg.h>
35
#include <string.h>
36
#include <stdio.h>
37
#include <stdlib.h>
38
39
/* Specification */
40
#include "markup.h"
41
42
#include "c-ctype.h"
43
#include "gettext.h"
44
#include "gl_linked_list.h"
45
#include "gl_xlist.h"
46
#include "unictype.h"
47
#include "unistr.h"
48
#include "xalloc.h"
49
#include "xvasprintf.h"
50
#include "xstrerror.h"
51
52
0
#define _(s) gettext(s)
53
54
/**
55
 * The "markup" parser is intended to parse a simple markup format
56
 * that's a subset of XML.  This is a small, efficient, easy-to-use
57
 * parser.  It should not be used if you expect to interoperate with
58
 * other applications generating full-scale XML.  However, it's very
59
 * useful for application data files, config files, etc. where you
60
 * know your application will be the only one writing the file.
61
 * Full-scale XML parsers should be able to parse the subset used by
62
 * markup, so you can easily migrate to full-scale XML at a later
63
 * time if the need arises.
64
 *
65
 * The parser is not guaranteed to signal an error on all invalid XML;
66
 * the parser may accept documents that an XML parser would not.
67
 * However, XML documents which are not well-formed (which is a weaker
68
 * condition than being valid.  See the XML specification
69
 * <https://www.w3.org/TR/REC-xml/> for definitions of these terms.)
70
 * are not considered valid GMarkup documents.
71
 *
72
 * Simplifications to XML include:
73
 *
74
 * - Only UTF-8 encoding is allowed
75
 *
76
 * - No user-defined entities
77
 *
78
 * - Processing instructions, comments and the doctype declaration
79
 *   are "passed through" but are not interpreted in any way
80
 *
81
 * - No DTD or validation
82
 *
83
 * The markup format does support:
84
 *
85
 * - Elements
86
 *
87
 * - Attributes
88
 *
89
 * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
90
 *
91
 * - Character references
92
 *
93
 * - Sections marked as CDATA
94
 */
95
96
typedef enum
97
{
98
  STATE_START,
99
  STATE_AFTER_OPEN_ANGLE,
100
  STATE_AFTER_CLOSE_ANGLE,
101
  STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
102
  STATE_INSIDE_OPEN_TAG_NAME,
103
  STATE_INSIDE_ATTRIBUTE_NAME,
104
  STATE_AFTER_ATTRIBUTE_NAME,
105
  STATE_BETWEEN_ATTRIBUTES,
106
  STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
107
  STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
108
  STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
109
  STATE_INSIDE_TEXT,
110
  STATE_AFTER_CLOSE_TAG_SLASH,
111
  STATE_INSIDE_CLOSE_TAG_NAME,
112
  STATE_AFTER_CLOSE_TAG_NAME,
113
  STATE_INSIDE_PASSTHROUGH,
114
  STATE_ERROR
115
} markup_parse_state_ty;
116
117
typedef struct
118
{
119
  const char *prev_element;
120
  const markup_parser_ty *prev_parser;
121
  void *prev_user_data;
122
} markup_recursion_tracker_ty;
123
124
typedef struct
125
{
126
  char *buffer;
127
  size_t bufmax;
128
  size_t buflen;
129
} markup_string_ty;
130
131
struct _markup_parse_context_ty
132
{
133
  const markup_parser_ty *parser;
134
135
  markup_parse_flags_ty flags;
136
137
  int line_number;
138
  int char_number;
139
140
  markup_parse_state_ty state;
141
142
  void *user_data;
143
144
  /* A piece of character data or an element that
145
   * hasn't "ended" yet so we haven't yet called
146
   * the callback for it.
147
   */
148
  markup_string_ty *partial_chunk;
149
150
  gl_list_t tag_stack;          /* <markup_string_ty> */
151
152
  char **attr_names;
153
  char **attr_values;
154
  int cur_attr;
155
  int alloc_attrs;
156
157
  const char *current_text;
158
  ssize_t current_text_len;
159
  const char *current_text_end;
160
161
  /* used to save the start of the last interesting thingy */
162
  const char *start;
163
164
  const char *iter;
165
166
  char *error_text;
167
168
  unsigned int document_empty : 1;
169
  unsigned int parsing : 1;
170
  unsigned int awaiting_pop : 1;
171
  int balance;
172
173
  /* subparser support */
174
  gl_list_t subparser_stack;    /* <markup_recursion_tracker_ty *> */
175
  const char *subparser_element;
176
};
177
178
static markup_string_ty *
179
markup_string_new (void)
180
0
{
181
0
  return XZALLOC (markup_string_ty);
182
0
}
183
184
static char *
185
markup_string_free (markup_string_ty *string, bool free_segment)
186
0
{
187
0
  if (free_segment)
188
0
    {
189
0
      free (string->buffer);
190
0
      free (string);
191
0
      return NULL;
192
0
    }
193
0
  else
194
0
    {
195
0
      char *result = string->buffer;
196
0
      free (string);
197
0
      return result;
198
0
    }
199
0
}
200
201
static void
202
markup_string_free1 (markup_string_ty *string)
203
0
{
204
0
  markup_string_free (string, true);
205
0
}
206
207
static void
208
markup_string_truncate (markup_string_ty *string, size_t length)
209
0
{
210
0
  assert (string && length < string->buflen - 1);
211
0
  string->buffer[length] = '\0';
212
0
  string->buflen = length;
213
0
}
214
215
static void
216
markup_string_append (markup_string_ty *string, const char *to_append,
217
                      size_t length)
218
0
{
219
0
  if (string->buflen + length + 1 > string->bufmax)
220
0
    {
221
0
      string->bufmax *= 2;
222
0
      if (string->buflen + length + 1 > string->bufmax)
223
0
        string->bufmax = string->buflen + length + 1;
224
0
      string->buffer = xrealloc (string->buffer, string->bufmax);
225
0
    }
226
0
  memcpy (string->buffer + string->buflen, to_append, length);
227
0
  string->buffer[length] = '\0';
228
0
  string->buflen = length;
229
0
}
230
231
static inline void
232
string_blank (markup_string_ty *string)
233
0
{
234
0
  if (string->bufmax > 0)
235
0
    {
236
0
      *string->buffer = '\0';
237
0
      string->buflen = 0;
238
0
    }
239
0
}
240
241
/* Creates a new parse context.  A parse context is used to parse
242
   marked-up documents.  You can feed any number of documents into a
243
   context, as long as no errors occur; once an error occurs, the
244
   parse context can't continue to parse text (you have to free it and
245
   create a new parse context).  */
246
markup_parse_context_ty *
247
markup_parse_context_new (const markup_parser_ty *parser,
248
                          markup_parse_flags_ty flags,
249
                          void *user_data)
250
0
{
251
0
  assert (parser != NULL);
252
253
0
  markup_parse_context_ty *context = XMALLOC (markup_parse_context_ty);
254
255
0
  context->parser = parser;
256
0
  context->flags = flags;
257
0
  context->user_data = user_data;
258
259
0
  context->line_number = 1;
260
0
  context->char_number = 1;
261
262
0
  context->partial_chunk = NULL;
263
264
0
  context->state = STATE_START;
265
0
  context->tag_stack =
266
0
    gl_list_create_empty (GL_LINKED_LIST,
267
0
                          NULL, NULL,
268
0
                          (gl_listelement_dispose_fn) markup_string_free1,
269
0
                          true);
270
0
  context->attr_names = NULL;
271
0
  context->attr_values = NULL;
272
0
  context->cur_attr = -1;
273
0
  context->alloc_attrs = 0;
274
275
0
  context->current_text = NULL;
276
0
  context->current_text_len = -1;
277
0
  context->current_text_end = NULL;
278
279
0
  context->start = NULL;
280
0
  context->iter = NULL;
281
282
0
  context->error_text = NULL;
283
284
0
  context->document_empty = true;
285
0
  context->parsing = false;
286
287
0
  context->awaiting_pop = false;
288
0
  context->subparser_stack =
289
0
    gl_list_create_empty (GL_LINKED_LIST,
290
0
                          NULL, NULL,
291
0
                          (gl_listelement_dispose_fn) free,
292
0
                          true);
293
0
  context->subparser_element = NULL;
294
295
0
  context->balance = 0;
296
297
0
  return context;
298
0
}
299
300
static void clear_attributes (markup_parse_context_ty *context);
301
302
/* Frees a parse context.  This function can't be called from inside
303
   one of the markup_parser_ty functions or while a subparser is
304
   pushed.  */
305
void
306
markup_parse_context_free (markup_parse_context_ty *context)
307
0
{
308
0
  assert (context != NULL);
309
0
  assert (!context->parsing);
310
0
  assert (gl_list_size (context->subparser_stack) == 0);
311
0
  assert (!context->awaiting_pop);
312
313
0
  clear_attributes (context);
314
0
  free (context->attr_names);
315
0
  free (context->attr_values);
316
317
0
  gl_list_free (context->tag_stack);
318
0
  gl_list_free (context->subparser_stack);
319
320
0
  if (context->partial_chunk)
321
0
    markup_string_free (context->partial_chunk, true);
322
323
0
  free (context->error_text);
324
325
0
  free (context);
326
0
}
327
328
static void pop_subparser_stack (markup_parse_context_ty *context);
329
330
static void
331
emit_error (markup_parse_context_ty *context, const char *error_text)
332
0
{
333
0
  context->state = STATE_ERROR;
334
335
0
  if (context->parser->error)
336
0
    (*context->parser->error) (context, error_text, context->user_data);
337
338
  /* report the error all the way up to free all the user-data */
339
0
  while (gl_list_size (context->subparser_stack) > 0)
340
0
    {
341
0
      pop_subparser_stack (context);
342
0
      context->awaiting_pop = false; /* already been freed */
343
344
0
      if (context->parser->error)
345
0
        (*context->parser->error) (context, error_text, context->user_data);
346
0
    }
347
348
0
  if (context->error_text)
349
0
    free (context->error_text);
350
0
  context->error_text = xstrdup (error_text);
351
0
}
352
353
#define IS_COMMON_NAME_END_CHAR(c) \
354
0
  ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
355
356
static bool
357
slow_name_validate (markup_parse_context_ty *context, const char *name)
358
0
{
359
0
  if (u8_check ((const uint8_t *) name, strlen (name)) != NULL)
360
0
    {
361
0
      emit_error (context, _("invalid UTF-8 sequence"));
362
0
      return false;
363
0
    }
364
365
0
  ucs4_t uc;
366
367
0
  {
368
0
    const char *p = name;
369
370
0
    if (!(c_isalpha (*p)
371
0
          || (!IS_COMMON_NAME_END_CHAR (*p)
372
0
              && (*p == '_'
373
0
                  || *p == ':'
374
0
                  || (u8_mbtouc (&uc, (const uint8_t *) name, strlen (name)) > 0
375
0
                      && uc_is_alpha (uc))))))
376
0
      {
377
0
        char *error_text = xasprintf (_("'%s' is not a valid name: %c"),
378
0
                                      name, *p);
379
0
        emit_error (context, error_text);
380
0
        free (error_text);
381
0
        return false;
382
0
      }
383
0
  }
384
385
0
  for (const char *p = (const char *) u8_next (&uc, (const uint8_t *) name);
386
0
       p != NULL;
387
0
       p = (const char *) u8_next (&uc, (const uint8_t *) p))
388
0
    {
389
      /* is_name_char */
390
0
      if (!(c_isalnum (*p)
391
0
            || (!IS_COMMON_NAME_END_CHAR (*p)
392
0
                && (*p == '.' || *p == '-' || *p == '_' || *p == ':'
393
0
                    || uc_is_alpha (uc)))))
394
0
        {
395
0
          char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
396
0
                                        name, *p);
397
0
          emit_error (context, error_text);
398
0
          free (error_text);
399
0
          return false;
400
0
        }
401
0
    }
402
403
0
  return true;
404
0
}
405
406
/*
407
 * Use me for elements, attributes etc.
408
 */
409
static bool
410
name_validate (markup_parse_context_ty *context, const char *name)
411
0
{
412
  /* name start char */
413
0
  const char *p = name;
414
0
  if (IS_COMMON_NAME_END_CHAR (*p)
415
0
      || !(c_isalpha (*p) || *p == '_' || *p == ':'))
416
0
    goto slow_validate;
417
418
0
  {
419
0
    char mask;
420
421
0
    for (mask = *p++; *p != '\0'; p++)
422
0
      {
423
0
        mask |= *p;
424
425
        /* is_name_char */
426
0
        if (!(c_isalnum (*p)
427
0
              || (!IS_COMMON_NAME_END_CHAR (*p)
428
0
                  && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
429
0
          goto slow_validate;
430
0
      }
431
432
0
    if (mask & 0x80) /* un-common / non-ascii */
433
0
      goto slow_validate;
434
0
  }
435
436
0
  return true;
437
438
0
 slow_validate:
439
0
  return slow_name_validate (context, name);
440
0
}
441
442
static bool
443
text_validate (markup_parse_context_ty *context,
444
               const char *p,
445
               int len)
446
0
{
447
0
  if (u8_check ((const uint8_t *) p, len) != NULL)
448
0
    {
449
0
      emit_error (context, _("invalid UTF-8 sequence"));
450
0
      return false;
451
0
    }
452
0
  else
453
0
    return true;
454
0
}
455
456
/*
457
 * re-write the GString in-place, unescaping anything that escaped.
458
 * most XML does not contain entities, or escaping.
459
 */
460
static bool
461
unescape_string_inplace (markup_parse_context_ty *context,
462
                         markup_string_ty *string,
463
                         bool *is_ascii)
464
0
{
465
0
  if (string->buflen == 0)
466
0
    return true;
467
468
0
  *is_ascii = false;
469
470
  /* are we unescaping an attribute or not ? */
471
0
  bool normalize_attribute;
472
0
  if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
473
0
      || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
474
0
    normalize_attribute = true;
475
0
  else
476
0
    normalize_attribute = false;
477
478
  /*
479
   * Meeks' theorem: unescaping can only shrink text.
480
   * for &lt; etc. this is obvious, for &#xffff; more
481
   * thought is required, but this is patently so.
482
   */
483
0
  char mask = 0;
484
0
  const char *from;
485
0
  char *to;
486
0
  for (from = to = string->buffer; *from != '\0'; from++, to++)
487
0
    {
488
0
      *to = *from;
489
490
0
      mask |= *to;
491
0
      if (normalize_attribute && (*to == '\t' || *to == '\n'))
492
0
        *to = ' ';
493
0
      if (*to == '\r')
494
0
        {
495
0
          *to = normalize_attribute ? ' ' : '\n';
496
0
          if (from[1] == '\n')
497
0
            from++;
498
0
        }
499
0
      if (*from == '&')
500
0
        {
501
0
          from++;
502
0
          if (*from == '#')
503
0
            {
504
0
              from++;
505
506
0
              int base = 10;
507
0
              if (*from == 'x')
508
0
                {
509
0
                  base = 16;
510
0
                  from++;
511
0
                }
512
513
0
              unsigned long l;
514
0
              char *end = NULL;
515
0
              if (!(base == 16 ? c_isxdigit (*from) : c_isdigit (*from))
516
0
                  || /* No need to reset and test errno here, because in case
517
                        of overflow, l will be == ULONG_MAX, which is
518
                        > 0x10FFFF.  */
519
0
                     (l = strtoul (from, &end, base),
520
0
                      end == from))
521
0
                {
522
0
                  char *error_text =
523
0
                    xasprintf (_("invalid character reference: %s"),
524
0
                               _("not a valid number specification"));
525
0
                  emit_error (context, error_text);
526
0
                  free (error_text);
527
0
                  return false;
528
0
                }
529
0
              else if (*end != ';')
530
0
                {
531
0
                  char *error_text =
532
0
                    xasprintf (_("invalid character reference: %s"),
533
0
                               _("no ending ';'"));
534
0
                  emit_error (context, error_text);
535
0
                  free (error_text);
536
0
                  return false;
537
0
                }
538
0
              else if (/* characters XML 1.1 permits */
539
0
                       (0 < l && l <= 0xD7FF)
540
0
                       || (0xE000 <= l && l <= 0xFFFD) || (0x10000 <= l && l <= 0x10FFFF))
541
0
                {
542
0
                  char buf[8];
543
0
                  int length = u8_uctomb ((uint8_t *) buf, l, 8);
544
0
                  memcpy (to, buf, length);
545
0
                  to += length - 1;
546
0
                  from = end;
547
0
                  if (l >= 0x80) /* not ASCII */
548
0
                    mask |= 0x80;
549
0
                }
550
0
              else
551
0
                {
552
0
                  char *error_text =
553
0
                    xasprintf (_("invalid character reference: %s"),
554
0
                               _("non-permitted character"));
555
0
                  emit_error (context, error_text);
556
0
                  free (error_text);
557
0
                  return false;
558
0
                }
559
0
            }
560
561
0
          else if (strncmp (from, "lt;", 3) == 0)
562
0
            {
563
0
              *to = '<';
564
0
              from += 2;
565
0
            }
566
0
          else if (strncmp (from, "gt;", 3) == 0)
567
0
            {
568
0
              *to = '>';
569
0
              from += 2;
570
0
            }
571
0
          else if (strncmp (from, "amp;", 4) == 0)
572
0
            {
573
0
              *to = '&';
574
0
              from += 3;
575
0
            }
576
0
          else if (strncmp (from, "quot;", 5) == 0)
577
0
            {
578
0
              *to = '"';
579
0
              from += 4;
580
0
            }
581
0
          else if (strncmp (from, "apos;", 5) == 0)
582
0
            {
583
0
              *to = '\'';
584
0
              from += 4;
585
0
            }
586
0
          else
587
0
            {
588
0
              const char *reason;
589
0
              if (*from == ';')
590
0
                reason = _("empty");
591
0
              else
592
0
                {
593
0
                  const char *end = strchr (from, ';');
594
0
                  if (end)
595
0
                    reason = _("unknown");
596
0
                  else
597
0
                    reason = _("no ending ';'");
598
0
                }
599
600
0
              char *error_text = xasprintf (_("invalid entity reference: %s"),
601
0
                                            reason);
602
0
              emit_error (context, error_text);
603
0
              free (error_text);
604
0
              return false;
605
0
            }
606
0
        }
607
0
    }
608
609
0
  assert (to - string->buffer <= string->buflen);
610
0
  if (to - string->buffer != string->buflen)
611
0
    markup_string_truncate (string, to - string->buffer);
612
613
0
  *is_ascii = !(mask & 0x80);
614
615
0
  return true;
616
0
}
617
618
static inline bool
619
advance_char (markup_parse_context_ty *context)
620
0
{
621
0
  context->iter++;
622
0
  context->char_number++;
623
624
0
  if (context->iter == context->current_text_end)
625
0
    return false;
626
627
0
  if (*context->iter == '\n')
628
0
    {
629
0
      context->line_number++;
630
0
      context->char_number = 1;
631
0
    }
632
633
0
  return true;
634
0
}
635
636
static inline bool
637
xml_isspace (char c)
638
0
{
639
0
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
640
0
}
641
642
static void
643
skip_spaces (markup_parse_context_ty *context)
644
0
{
645
0
  do
646
0
    {
647
0
      if (!xml_isspace (*context->iter))
648
0
        return;
649
0
    }
650
0
  while (advance_char (context));
651
0
}
652
653
static void
654
advance_to_name_end (markup_parse_context_ty *context)
655
0
{
656
0
  do
657
0
    {
658
0
      if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
659
0
        return;
660
0
      if (xml_isspace (*(context->iter)))
661
0
        return;
662
0
    }
663
0
  while (advance_char (context));
664
0
}
665
666
static void
667
add_to_partial (markup_parse_context_ty *context,
668
                const char         *text_start,
669
                const char         *text_end)
670
0
{
671
0
  if (context->partial_chunk == NULL)
672
0
    { /* allocate a new chunk to parse into */
673
674
0
      context->partial_chunk = markup_string_new ();
675
0
    }
676
677
0
  if (text_start != text_end)
678
0
    markup_string_append (context->partial_chunk,
679
0
                          text_start, text_end - text_start);
680
0
}
681
682
static inline void
683
truncate_partial (markup_parse_context_ty *context)
684
0
{
685
0
  if (context->partial_chunk != NULL)
686
0
    string_blank (context->partial_chunk);
687
0
}
688
689
static inline const char*
690
current_element (markup_parse_context_ty *context)
691
0
{
692
0
  const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
693
0
  return string->buffer;
694
0
}
695
696
static void
697
pop_subparser_stack (markup_parse_context_ty *context)
698
0
{
699
0
  markup_recursion_tracker_ty *tracker;
700
701
0
  assert (gl_list_size (context->subparser_stack) > 0);
702
703
0
  tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
704
705
0
  context->awaiting_pop = true;
706
707
0
  context->user_data = tracker->prev_user_data;
708
0
  context->parser = tracker->prev_parser;
709
0
  context->subparser_element = tracker->prev_element;
710
0
  free (tracker);
711
712
0
  gl_list_remove_at (context->subparser_stack, 0);
713
0
}
714
715
static void
716
push_partial_as_tag (markup_parse_context_ty *context)
717
0
{
718
0
  gl_list_add_first (context->tag_stack, context->partial_chunk);
719
0
  context->partial_chunk = NULL;
720
0
}
721
722
static void
723
pop_tag (markup_parse_context_ty *context)
724
0
{
725
0
  gl_list_remove_at (context->tag_stack, 0);
726
0
}
727
728
static void
729
possibly_finish_subparser (markup_parse_context_ty *context)
730
0
{
731
0
  if (current_element (context) == context->subparser_element)
732
0
    pop_subparser_stack (context);
733
0
}
734
735
static void
736
ensure_no_outstanding_subparser (markup_parse_context_ty *context)
737
0
{
738
0
  context->awaiting_pop = false;
739
0
}
740
741
static void
742
add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
743
0
{
744
0
  if (context->cur_attr + 2 >= context->alloc_attrs)
745
0
    {
746
0
      context->alloc_attrs += 5; /* silly magic number */
747
0
      context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
748
0
      context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
749
0
    }
750
0
  context->cur_attr++;
751
0
  context->attr_names[context->cur_attr] = xstrdup (string->buffer);
752
0
  context->attr_values[context->cur_attr] = NULL;
753
0
  context->attr_names[context->cur_attr+1] = NULL;
754
0
  context->attr_values[context->cur_attr+1] = NULL;
755
0
}
756
757
static void
758
clear_attributes (markup_parse_context_ty *context)
759
0
{
760
  /* Go ahead and free the attributes. */
761
0
  for (; context->cur_attr >= 0; context->cur_attr--)
762
0
    {
763
0
      int pos = context->cur_attr;
764
0
      free (context->attr_names[pos]);
765
0
      free (context->attr_values[pos]);
766
0
      context->attr_names[pos] = context->attr_values[pos] = NULL;
767
0
    }
768
0
  assert (context->cur_attr == -1);
769
0
  assert (context->attr_names == NULL || context->attr_names[0] == NULL);
770
0
  assert (context->attr_values == NULL || context->attr_values[0] == NULL);
771
0
}
772
773
static void
774
markup_parse_context_push (markup_parse_context_ty *context,
775
                           const markup_parser_ty *parser,
776
                           void *user_data)
777
0
{
778
0
  markup_recursion_tracker_ty *tracker = XMALLOC (markup_recursion_tracker_ty);
779
0
  tracker->prev_element = context->subparser_element;
780
0
  tracker->prev_parser = context->parser;
781
0
  tracker->prev_user_data = context->user_data;
782
783
0
  context->subparser_element = current_element (context);
784
0
  context->parser = parser;
785
0
  context->user_data = user_data;
786
787
0
  gl_list_add_first (context->subparser_stack, tracker);
788
0
}
789
790
static void
791
markup_parse_context_pop (markup_parse_context_ty *context)
792
0
{
793
0
  if (!context->awaiting_pop)
794
0
    possibly_finish_subparser (context);
795
796
0
  assert (context->awaiting_pop);
797
798
0
  context->awaiting_pop = false;
799
0
}
800
801
/* This has to be a separate function to ensure the alloca's
802
 * are unwound on exit - otherwise we grow & blow the stack
803
 * with large documents
804
 */
805
static inline void
806
emit_start_element (markup_parse_context_ty *context)
807
0
{
808
  /* In case we want to ignore qualified tags and we see that we have
809
   * one here, we push a subparser.  This will ignore all tags inside of
810
   * the qualified tag.
811
   *
812
   * We deal with the end of the subparser from emit_end_element.
813
   */
814
0
  if ((context->flags & MARKUP_IGNORE_QUALIFIED)
815
0
      && strchr (current_element (context), ':'))
816
0
    {
817
0
      static const markup_parser_ty ignore_parser;
818
0
      markup_parse_context_push (context, &ignore_parser, NULL);
819
0
      clear_attributes (context);
820
0
      return;
821
0
    }
822
823
0
  const char **attr_names = XCALLOC (context->cur_attr + 2, const char *);
824
0
  const char **attr_values = XCALLOC (context->cur_attr + 2, const char *);
825
0
  {
826
0
    int j = 0;
827
0
    for (int i = 0; i < context->cur_attr + 1; i++)
828
0
      {
829
        /* Possibly omit qualified attribute names from the list */
830
0
        if (!((context->flags & MARKUP_IGNORE_QUALIFIED)
831
0
              && strchr (context->attr_names[i], ':')))
832
0
          {
833
0
            attr_names[j] = context->attr_names[i];
834
0
            attr_values[j] = context->attr_values[i];
835
0
            j++;
836
0
          }
837
0
      }
838
0
    attr_names[j] = NULL;
839
0
    attr_values[j] = NULL;
840
0
  }
841
842
  /* Call user callback for element start */
843
0
  const char *start_name = current_element (context);
844
845
0
  if (context->parser->start_element && name_validate (context, start_name))
846
0
    (* context->parser->start_element) (context,
847
0
                                        start_name,
848
0
                                        (const char **)attr_names,
849
0
                                        (const char **)attr_values,
850
0
                                        context->user_data);
851
0
  free (attr_names);
852
0
  free (attr_values);
853
0
  clear_attributes (context);
854
0
}
855
856
static void
857
emit_end_element (markup_parse_context_ty *context)
858
0
{
859
0
  assert (gl_list_size (context->tag_stack) != 0);
860
861
0
  possibly_finish_subparser (context);
862
863
  /* We might have just returned from our ignore subparser */
864
0
  if ((context->flags & MARKUP_IGNORE_QUALIFIED)
865
0
      && strchr (current_element (context), ':'))
866
0
    {
867
0
      markup_parse_context_pop (context);
868
0
      pop_tag (context);
869
0
      return;
870
0
    }
871
872
0
  if (context->parser->end_element)
873
0
    (* context->parser->end_element) (context,
874
0
                                      current_element (context),
875
0
                                      context->user_data);
876
877
0
  ensure_no_outstanding_subparser (context);
878
879
0
  pop_tag (context);
880
0
}
881
882
/* Feed some data to the parse context.  The data need not be valid
883
   UTF-8; an error will be signaled if it's invalid.  The data need
884
   not be an entire document; you can feed a document into the parser
885
   incrementally, via multiple calls to this function.  Typically, as
886
   you receive data from a network connection or file, you feed each
887
   received chunk of data into this function, aborting the process if
888
   an error occurs. Once an error is reported, no further data may be
889
   fed to the parse context; all errors are fatal.  */
890
bool
891
markup_parse_context_parse (markup_parse_context_ty *context,
892
                            const char *text,
893
                            ssize_t text_len)
894
0
{
895
0
  assert (context != NULL);
896
0
  assert (text != NULL);
897
0
  assert (context->state != STATE_ERROR);
898
0
  assert (!context->parsing);
899
900
0
  if (text_len < 0)
901
0
    text_len = strlen (text);
902
903
0
  if (text_len == 0)
904
0
    return true;
905
906
0
  context->parsing = true;
907
908
909
0
  context->current_text = text;
910
0
  context->current_text_len = text_len;
911
0
  context->current_text_end = context->current_text + text_len;
912
0
  context->iter = context->current_text;
913
0
  context->start = context->iter;
914
915
0
  while (context->iter != context->current_text_end)
916
0
    {
917
0
      switch (context->state)
918
0
        {
919
0
        case STATE_START:
920
          /* Possible next state: AFTER_OPEN_ANGLE */
921
922
0
          assert (gl_list_size (context->tag_stack) == 0);
923
924
          /* whitespace is ignored outside of any elements */
925
0
          skip_spaces (context);
926
927
0
          if (context->iter != context->current_text_end)
928
0
            {
929
0
              if (*context->iter == '<')
930
0
                {
931
                  /* Move after the open angle */
932
0
                  advance_char (context);
933
934
0
                  context->state = STATE_AFTER_OPEN_ANGLE;
935
936
                  /* this could start a passthrough */
937
0
                  context->start = context->iter;
938
939
                  /* document is now non-empty */
940
0
                  context->document_empty = false;
941
0
                }
942
0
              else
943
0
                {
944
0
                  emit_error (context,
945
0
                              _("document must begin with an element"));
946
0
                }
947
0
            }
948
0
          break;
949
950
0
        case STATE_AFTER_OPEN_ANGLE:
951
          /* Possible next states: INSIDE_OPEN_TAG_NAME,
952
           *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
953
           */
954
0
          if (*context->iter == '?' || *context->iter == '!')
955
0
            {
956
              /* include < in the passthrough */
957
0
              const char *openangle = "<";
958
0
              add_to_partial (context, openangle, openangle + 1);
959
0
              context->start = context->iter;
960
0
              context->balance = 1;
961
0
              context->state = STATE_INSIDE_PASSTHROUGH;
962
0
            }
963
0
          else if (*context->iter == '/')
964
0
            {
965
              /* move after it */
966
0
              advance_char (context);
967
968
0
              context->state = STATE_AFTER_CLOSE_TAG_SLASH;
969
0
            }
970
0
          else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
971
0
            {
972
0
              context->state = STATE_INSIDE_OPEN_TAG_NAME;
973
974
              /* start of tag name */
975
0
              context->start = context->iter;
976
0
            }
977
0
          else
978
0
            {
979
0
              char *error_text = xasprintf (_("invalid character after '%s'"),
980
0
                                            "<");
981
0
              emit_error (context, error_text);
982
0
              free (error_text);
983
0
            }
984
0
          break;
985
986
          /* The AFTER_CLOSE_ANGLE state is actually sort of
987
           * broken, because it doesn't correspond to a range
988
           * of characters in the input stream as the others do,
989
           * and thus makes things harder to conceptualize
990
           */
991
0
        case STATE_AFTER_CLOSE_ANGLE:
992
          /* Possible next states: INSIDE_TEXT, STATE_START */
993
0
          if (gl_list_size (context->tag_stack) == 0)
994
0
            {
995
0
              context->start = NULL;
996
0
              context->state = STATE_START;
997
0
            }
998
0
          else
999
0
            {
1000
0
              context->start = context->iter;
1001
0
              context->state = STATE_INSIDE_TEXT;
1002
0
            }
1003
0
          break;
1004
1005
0
        case STATE_AFTER_ELISION_SLASH:
1006
          /* Possible next state: AFTER_CLOSE_ANGLE */
1007
0
          if (*context->iter == '>')
1008
0
            {
1009
              /* move after the close angle */
1010
0
              advance_char (context);
1011
0
              context->state = STATE_AFTER_CLOSE_ANGLE;
1012
0
              emit_end_element (context);
1013
0
            }
1014
0
          else
1015
0
            {
1016
0
              char *error_text = xasprintf (_("missing '%c'"), '>');
1017
0
              emit_error (context, error_text);
1018
0
              free (error_text);
1019
0
            }
1020
0
          break;
1021
1022
0
        case STATE_INSIDE_OPEN_TAG_NAME:
1023
          /* Possible next states: BETWEEN_ATTRIBUTES */
1024
1025
          /* if there's a partial chunk then it's the first part of the
1026
           * tag name. If there's a context->start then it's the start
1027
           * of the tag name in current_text, the partial chunk goes
1028
           * before that start though.
1029
           */
1030
0
          advance_to_name_end (context);
1031
1032
0
          if (context->iter == context->current_text_end)
1033
0
            {
1034
              /* The name hasn't necessarily ended. Merge with
1035
               * partial chunk, leave state unchanged.
1036
               */
1037
0
              add_to_partial (context, context->start, context->iter);
1038
0
            }
1039
0
          else
1040
0
            {
1041
              /* The name has ended. Combine it with the partial chunk
1042
               * if any; push it on the stack; enter next state.
1043
               */
1044
0
              add_to_partial (context, context->start, context->iter);
1045
0
              push_partial_as_tag (context);
1046
1047
0
              context->state = STATE_BETWEEN_ATTRIBUTES;
1048
0
              context->start = NULL;
1049
0
            }
1050
0
          break;
1051
1052
0
        case STATE_INSIDE_ATTRIBUTE_NAME:
1053
          /* Possible next states: AFTER_ATTRIBUTE_NAME */
1054
1055
0
          advance_to_name_end (context);
1056
0
          add_to_partial (context, context->start, context->iter);
1057
1058
          /* read the full name, if we enter the equals sign state
1059
           * then add the attribute to the list (without the value),
1060
           * otherwise store a partial chunk to be prepended later.
1061
           */
1062
0
          if (context->iter != context->current_text_end)
1063
0
            context->state = STATE_AFTER_ATTRIBUTE_NAME;
1064
0
          break;
1065
1066
0
        case STATE_AFTER_ATTRIBUTE_NAME:
1067
          /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1068
1069
0
          skip_spaces (context);
1070
1071
0
          if (context->iter != context->current_text_end)
1072
0
            {
1073
              /* The name has ended. Combine it with the partial chunk
1074
               * if any; push it on the stack; enter next state.
1075
               */
1076
0
              if (!name_validate (context, context->partial_chunk->buffer))
1077
0
                break;
1078
1079
0
              add_attribute (context, context->partial_chunk);
1080
1081
0
              markup_string_free (context->partial_chunk, true);
1082
0
              context->partial_chunk = NULL;
1083
0
              context->start = NULL;
1084
1085
0
              if (*context->iter == '=')
1086
0
                {
1087
0
                  advance_char (context);
1088
0
                  context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1089
0
                }
1090
0
              else
1091
0
                {
1092
0
                  char *error_text = xasprintf (_("missing '%c'"), '=');
1093
0
                  emit_error (context, error_text);
1094
0
                  free (error_text);
1095
0
                }
1096
0
            }
1097
0
          break;
1098
1099
0
        case STATE_BETWEEN_ATTRIBUTES:
1100
          /* Possible next states: AFTER_CLOSE_ANGLE,
1101
           * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1102
           */
1103
0
          skip_spaces (context);
1104
1105
0
          if (context->iter != context->current_text_end)
1106
0
            {
1107
0
              if (*context->iter == '/')
1108
0
                {
1109
0
                  advance_char (context);
1110
0
                  context->state = STATE_AFTER_ELISION_SLASH;
1111
0
                }
1112
0
              else if (*context->iter == '>')
1113
0
                {
1114
0
                  advance_char (context);
1115
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1116
0
                }
1117
0
              else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1118
0
                {
1119
0
                  context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1120
                  /* start of attribute name */
1121
0
                  context->start = context->iter;
1122
0
                }
1123
0
              else
1124
0
                {
1125
0
                  char *error_text = xasprintf (_("missing '%c' or '%c'"),
1126
0
                                                '>', '/');
1127
0
                  emit_error (context, error_text);
1128
0
                  free (error_text);
1129
0
                }
1130
1131
              /* If we're done with attributes, invoke
1132
               * the start_element callback
1133
               */
1134
0
              if (context->state == STATE_AFTER_ELISION_SLASH
1135
0
                  || context->state == STATE_AFTER_CLOSE_ANGLE)
1136
0
                emit_start_element (context);
1137
0
            }
1138
0
          break;
1139
1140
0
        case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1141
          /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1142
1143
0
          skip_spaces (context);
1144
1145
0
          if (context->iter != context->current_text_end)
1146
0
            {
1147
0
              if (*context->iter == '"')
1148
0
                {
1149
0
                  advance_char (context);
1150
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1151
0
                  context->start = context->iter;
1152
0
                }
1153
0
              else if (*context->iter == '\'')
1154
0
                {
1155
0
                  advance_char (context);
1156
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1157
0
                  context->start = context->iter;
1158
0
                }
1159
0
              else
1160
0
                {
1161
0
                  char *error_text = xasprintf (_("missing '%c' or '%c'"),
1162
0
                                                '\'', '"');
1163
0
                  emit_error (context, error_text);
1164
0
                  free (error_text);
1165
0
                }
1166
0
            }
1167
0
          break;
1168
1169
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1170
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1171
          /* Possible next states: BETWEEN_ATTRIBUTES */
1172
0
          {
1173
0
            char delim;
1174
1175
0
            if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1176
0
              {
1177
0
                delim = '\'';
1178
0
              }
1179
0
            else
1180
0
              {
1181
0
                delim = '"';
1182
0
              }
1183
1184
0
            do
1185
0
              {
1186
0
                if (*context->iter == delim)
1187
0
                  break;
1188
0
              }
1189
0
            while (advance_char (context));
1190
0
          }
1191
0
          if (context->iter == context->current_text_end)
1192
0
            {
1193
              /* The value hasn't necessarily ended. Merge with
1194
               * partial chunk, leave state unchanged.
1195
               */
1196
0
              add_to_partial (context, context->start, context->iter);
1197
0
            }
1198
0
          else
1199
0
            {
1200
              /* The value has ended at the quote mark. Combine it
1201
               * with the partial chunk if any; set it for the current
1202
               * attribute.
1203
               */
1204
0
              add_to_partial (context, context->start, context->iter);
1205
1206
0
              assert (context->cur_attr >= 0);
1207
1208
0
              bool is_ascii;
1209
0
              if (unescape_string_inplace (context, context->partial_chunk,
1210
0
                                           &is_ascii)
1211
0
                  && (is_ascii
1212
0
                      || text_validate (context,
1213
0
                                        context->partial_chunk->buffer,
1214
0
                                        context->partial_chunk->buflen)))
1215
0
                {
1216
                  /* success, advance past quote and set state. */
1217
0
                  context->attr_values[context->cur_attr] =
1218
0
                    markup_string_free (context->partial_chunk, false);
1219
0
                  context->partial_chunk = NULL;
1220
0
                  advance_char (context);
1221
0
                  context->state = STATE_BETWEEN_ATTRIBUTES;
1222
0
                  context->start = NULL;
1223
0
                }
1224
1225
0
              truncate_partial (context);
1226
0
            }
1227
0
          break;
1228
1229
0
        case STATE_INSIDE_TEXT:
1230
          /* Possible next states: AFTER_OPEN_ANGLE */
1231
0
          do
1232
0
            {
1233
0
              if (*context->iter == '<')
1234
0
                break;
1235
0
            }
1236
0
          while (advance_char (context));
1237
1238
          /* The text hasn't necessarily ended. Merge with
1239
           * partial chunk, leave state unchanged.
1240
           */
1241
1242
0
          add_to_partial (context, context->start, context->iter);
1243
1244
0
          if (context->iter != context->current_text_end)
1245
0
            {
1246
              /* The text has ended at the open angle. Call the text
1247
               * callback.
1248
               */
1249
0
              bool is_ascii;
1250
0
              if (unescape_string_inplace (context, context->partial_chunk,
1251
0
                                           &is_ascii)
1252
0
                  && (is_ascii
1253
0
                      || text_validate (context,
1254
0
                                        context->partial_chunk->buffer,
1255
0
                                        context->partial_chunk->buflen)))
1256
0
                {
1257
0
                  if (context->parser->text)
1258
0
                    (*context->parser->text) (context,
1259
0
                                              context->partial_chunk->buffer,
1260
0
                                              context->partial_chunk->buflen,
1261
0
                                              context->user_data);
1262
1263
                  /* advance past open angle and set state. */
1264
0
                  advance_char (context);
1265
0
                  context->state = STATE_AFTER_OPEN_ANGLE;
1266
                  /* could begin a passthrough */
1267
0
                  context->start = context->iter;
1268
0
                }
1269
1270
0
              truncate_partial (context);
1271
0
            }
1272
0
          break;
1273
1274
0
        case STATE_AFTER_CLOSE_TAG_SLASH:
1275
          /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1276
0
          if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1277
0
            {
1278
0
              context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1279
1280
              /* start of tag name */
1281
0
              context->start = context->iter;
1282
0
            }
1283
0
          else
1284
0
            {
1285
0
              char *error_text = xasprintf (_("invalid character after '%s'"),
1286
0
                                            "</");
1287
0
              emit_error (context, error_text);
1288
0
              free (error_text);
1289
0
            }
1290
0
          break;
1291
1292
0
        case STATE_INSIDE_CLOSE_TAG_NAME:
1293
          /* Possible next state: AFTER_CLOSE_TAG_NAME */
1294
0
          advance_to_name_end (context);
1295
0
          add_to_partial (context, context->start, context->iter);
1296
1297
0
          if (context->iter != context->current_text_end)
1298
0
            context->state = STATE_AFTER_CLOSE_TAG_NAME;
1299
0
          break;
1300
1301
0
        case STATE_AFTER_CLOSE_TAG_NAME:
1302
          /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1303
1304
0
          skip_spaces (context);
1305
1306
0
          if (context->iter != context->current_text_end)
1307
0
            {
1308
0
              markup_string_ty *close_name = context->partial_chunk;
1309
0
              context->partial_chunk = NULL;
1310
1311
0
              if (*context->iter != '>')
1312
0
                {
1313
0
                  char *error_text =
1314
0
                    xasprintf (_("invalid character after '%s'"),
1315
0
                               _("a close element name"));
1316
0
                  emit_error (context, error_text);
1317
0
                  free (error_text);
1318
0
                }
1319
0
              else if (gl_list_size (context->tag_stack) == 0)
1320
0
                {
1321
0
                  emit_error (context, _("element is closed"));
1322
0
                }
1323
0
              else if (strcmp (close_name->buffer, current_element (context))
1324
0
                       != 0)
1325
0
                {
1326
0
                  emit_error (context, _("element is closed"));
1327
0
                }
1328
0
              else
1329
0
                {
1330
0
                  advance_char (context);
1331
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1332
0
                  context->start = NULL;
1333
1334
0
                  emit_end_element (context);
1335
0
                }
1336
0
              context->partial_chunk = close_name;
1337
0
              truncate_partial (context);
1338
0
            }
1339
0
          break;
1340
1341
0
        case STATE_INSIDE_PASSTHROUGH:
1342
          /* Possible next state: AFTER_CLOSE_ANGLE */
1343
0
          do
1344
0
            {
1345
0
              if (*context->iter == '<')
1346
0
                context->balance++;
1347
0
              if (*context->iter == '>')
1348
0
                {
1349
0
                  context->balance--;
1350
0
                  add_to_partial (context, context->start, context->iter);
1351
0
                  context->start = context->iter;
1352
1353
0
                  char *str = context->partial_chunk->buffer;
1354
0
                  size_t len = context->partial_chunk->buflen;
1355
1356
0
                  if (str[1] == '?' && str[len - 1] == '?')
1357
0
                    break;
1358
0
                  if (strncmp (str, "<!--", 4) == 0
1359
0
                      && strcmp (str + len - 2, "--") == 0)
1360
0
                    break;
1361
0
                  if (strncmp (str, "<![CDATA[", 9) == 0
1362
0
                      && strcmp (str + len - 2, "]]") == 0)
1363
0
                    break;
1364
0
                  if (strncmp (str, "<!DOCTYPE", 9) == 0
1365
0
                      && context->balance == 0)
1366
0
                    break;
1367
0
                }
1368
0
            }
1369
0
          while (advance_char (context));
1370
1371
0
          if (context->iter == context->current_text_end)
1372
0
            {
1373
              /* The passthrough hasn't necessarily ended. Merge with
1374
               * partial chunk, leave state unchanged.
1375
               */
1376
0
               add_to_partial (context, context->start, context->iter);
1377
0
            }
1378
0
          else
1379
0
            {
1380
              /* The passthrough has ended at the close angle. Combine
1381
               * it with the partial chunk if any. Call the passthrough
1382
               * callback. Note that the open/close angles are
1383
               * included in the text of the passthrough.
1384
               */
1385
0
              advance_char (context); /* advance past close angle */
1386
0
              add_to_partial (context, context->start, context->iter);
1387
1388
0
              if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT
1389
0
                  && strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
1390
0
                {
1391
0
                  if (context->parser->text
1392
0
                      && text_validate (context,
1393
0
                                        context->partial_chunk->buffer + 9,
1394
0
                                        context->partial_chunk->buflen - 12))
1395
0
                    (*context->parser->text) (context,
1396
0
                                              context->partial_chunk->buffer + 9,
1397
0
                                              context->partial_chunk->buflen - 12,
1398
0
                                              context->user_data);
1399
0
                }
1400
0
              else if (context->parser->passthrough
1401
0
                       && text_validate (context,
1402
0
                                         context->partial_chunk->buffer,
1403
0
                                         context->partial_chunk->buflen))
1404
0
                (*context->parser->passthrough) (context,
1405
0
                                                 context->partial_chunk->buffer,
1406
0
                                                 context->partial_chunk->buflen,
1407
0
                                                 context->user_data);
1408
1409
0
              truncate_partial (context);
1410
1411
0
              context->state = STATE_AFTER_CLOSE_ANGLE;
1412
0
              context->start = context->iter; /* could begin text */
1413
0
            }
1414
0
          break;
1415
1416
0
        case STATE_ERROR:
1417
0
          goto finished;
1418
0
          break;
1419
1420
0
        default:
1421
0
          abort ();
1422
0
          break;
1423
0
        }
1424
0
    }
1425
1426
0
 finished:
1427
0
  context->parsing = false;
1428
1429
0
  return context->state != STATE_ERROR;
1430
0
}
1431
1432
/* Signals to the parse context that all data has been fed into the
1433
 * parse context with markup_parse_context_parse.
1434
 *
1435
 * This function reports an error if the document isn't complete,
1436
 * for example if elements are still open.  */
1437
bool
1438
markup_parse_context_end_parse (markup_parse_context_ty *context)
1439
0
{
1440
0
  assert (context != NULL);
1441
0
  assert (!context->parsing);
1442
0
  assert (context->state != STATE_ERROR);
1443
1444
0
  if (context->partial_chunk != NULL)
1445
0
    {
1446
0
      markup_string_free (context->partial_chunk, true);
1447
0
      context->partial_chunk = NULL;
1448
0
    }
1449
1450
0
  if (context->document_empty)
1451
0
    {
1452
0
      emit_error (context, _("empty document"));
1453
0
      return false;
1454
0
    }
1455
1456
0
  context->parsing = true;
1457
1458
0
  const char *location = NULL;
1459
0
  switch (context->state)
1460
0
    {
1461
0
    case STATE_START:
1462
      /* Nothing to do */
1463
0
      break;
1464
1465
0
    case STATE_AFTER_OPEN_ANGLE:
1466
0
      location = _("after '<'");
1467
0
      break;
1468
1469
0
    case STATE_AFTER_CLOSE_ANGLE:
1470
0
      if (gl_list_size (context->tag_stack) > 0)
1471
0
        {
1472
          /* Error message the same as for INSIDE_TEXT */
1473
0
          location = _("elements still open");
1474
0
        }
1475
0
      break;
1476
1477
0
    case STATE_AFTER_ELISION_SLASH:
1478
0
      location = _("missing '>'");
1479
0
      break;
1480
1481
0
    case STATE_INSIDE_OPEN_TAG_NAME:
1482
0
      location = _("inside an element name");
1483
0
      break;
1484
1485
0
    case STATE_INSIDE_ATTRIBUTE_NAME:
1486
0
    case STATE_AFTER_ATTRIBUTE_NAME:
1487
0
      location = _("inside an attribute name");
1488
0
      break;
1489
1490
0
    case STATE_BETWEEN_ATTRIBUTES:
1491
0
      location = _("inside an open tag");
1492
0
      break;
1493
1494
0
    case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1495
0
      location = _("after '='");
1496
0
      break;
1497
1498
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1499
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1500
0
      location = _("inside an attribute value");
1501
0
      break;
1502
1503
0
    case STATE_INSIDE_TEXT:
1504
0
      assert (gl_list_size (context->tag_stack) > 0);
1505
0
      location = _("elements still open");
1506
0
      break;
1507
1508
0
    case STATE_AFTER_CLOSE_TAG_SLASH:
1509
0
    case STATE_INSIDE_CLOSE_TAG_NAME:
1510
0
    case STATE_AFTER_CLOSE_TAG_NAME:
1511
0
      location = _("inside the close tag");
1512
0
      break;
1513
1514
0
    case STATE_INSIDE_PASSTHROUGH:
1515
0
      location = _("inside a comment or processing instruction");
1516
0
      break;
1517
1518
0
    case STATE_ERROR:
1519
0
    default:
1520
0
      abort ();
1521
0
      break;
1522
0
    }
1523
1524
0
  if (location != NULL)
1525
0
    {
1526
0
      char *error_text = xasprintf (_("document ended unexpectedly: %s"),
1527
0
                                    location);
1528
0
      emit_error (context, error_text);
1529
0
      free (error_text);
1530
0
    }
1531
1532
0
  context->parsing = false;
1533
1534
0
  return context->state != STATE_ERROR;
1535
0
}
1536
1537
const char *
1538
markup_parse_context_get_error (markup_parse_context_ty *context)
1539
0
{
1540
0
  return context->error_text;
1541
0
}