Coverage Report

Created: 2026-04-01 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/glib/gmarkup.c
Line
Count
Source
1
/* gmarkup.c - Simple XML-like parser
2
 *
3
 *  Copyright 2000, 2003 Red Hat, Inc.
4
 *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
5
 *
6
 * SPDX-License-Identifier: LGPL-2.1-or-later
7
 *
8
 * This library is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * This library is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this library; if not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include "config.h"
23
24
#include <stdarg.h>
25
#include <string.h>
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <errno.h>
29
30
#include "gmarkup.h"
31
32
#include "gatomic.h"
33
#include "gslice.h"
34
#include "galloca.h"
35
#include "gstrfuncs.h"
36
#include "gstring.h"
37
#include "gtestutils.h"
38
#include "glibintl.h"
39
#include "gthread.h"
40
41
G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
42
43
typedef enum
44
{
45
  STATE_INITIAL,
46
  STATE_AFTER_BOM1,
47
  STATE_AFTER_BOM2,
48
  STATE_START,
49
  STATE_AFTER_OPEN_ANGLE,
50
  STATE_AFTER_CLOSE_ANGLE,
51
  STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
52
  STATE_INSIDE_OPEN_TAG_NAME,
53
  STATE_INSIDE_ATTRIBUTE_NAME,
54
  STATE_AFTER_ATTRIBUTE_NAME,
55
  STATE_BETWEEN_ATTRIBUTES,
56
  STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
57
  STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
58
  STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
59
  STATE_INSIDE_TEXT,
60
  STATE_AFTER_CLOSE_TAG_SLASH,
61
  STATE_INSIDE_CLOSE_TAG_NAME,
62
  STATE_AFTER_CLOSE_TAG_NAME,
63
  STATE_INSIDE_PASSTHROUGH,
64
  STATE_ERROR
65
} GMarkupParseState;
66
67
typedef struct
68
{
69
  const char *prev_element;
70
  const GMarkupParser *prev_parser;
71
  gpointer prev_user_data;
72
} GMarkupRecursionTracker;
73
74
struct _GMarkupParseContext
75
{
76
  const GMarkupParser *parser;
77
78
  gint ref_count;  /* (atomic) */
79
80
  GMarkupParseFlags flags;
81
82
  gint line_number;
83
  gint char_number;
84
  gsize offset;
85
86
  gint tag_line;
87
  gint tag_char;
88
  gsize tag_offset;
89
90
  GMarkupParseState state;
91
92
  gpointer user_data;
93
  GDestroyNotify dnotify;
94
95
  /* A piece of character data or an element that
96
   * hasn't "ended" yet so we haven't yet called
97
   * the callback for it.
98
   */
99
  GString *partial_chunk;
100
  GSList *spare_chunks;
101
102
  GSList *tag_stack;
103
  GSList *tag_stack_gstr;
104
  GSList *spare_list_nodes;
105
106
  GString **attr_names;
107
  GString **attr_values;
108
  gint cur_attr;
109
  gint alloc_attrs;
110
111
  const gchar *current_text;
112
  gssize       current_text_len;
113
  const gchar *current_text_end;
114
115
  /* used to save the start of the last interesting thingy */
116
  const gchar *start;
117
118
  const gchar *iter;
119
120
  guint document_empty : 1;
121
  guint parsing : 1;
122
  guint awaiting_pop : 1;
123
  gint balance;
124
125
  /* subparser support */
126
  GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
127
  const char *subparser_element;
128
  gpointer held_user_data;
129
};
130
131
/*
132
 * Helpers to reduce our allocation overhead, we have
133
 * a well defined allocation lifecycle.
134
 */
135
static GSList *
136
get_list_node (GMarkupParseContext *context, gpointer data)
137
0
{
138
0
  GSList *node;
139
0
  if (context->spare_list_nodes != NULL)
140
0
    {
141
0
      node = context->spare_list_nodes;
142
0
      context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node);
143
0
    }
144
0
  else
145
0
    node = g_slist_alloc();
146
0
  node->data = data;
147
0
  return node;
148
0
}
149
150
static void
151
free_list_node (GMarkupParseContext *context, GSList *node)
152
0
{
153
0
  node->data = NULL;
154
0
  context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes);
155
0
}
156
157
/**
158
 * g_markup_parse_context_new:
159
 * @parser: a #GMarkupParser
160
 * @flags: one or more #GMarkupParseFlags
161
 * @user_data: user data to pass to #GMarkupParser functions
162
 * @user_data_dnotify: user data destroy notifier called when
163
 *     the parse context is freed
164
 *
165
 * Creates a new parse context. A parse context is used to parse
166
 * marked-up documents. You can feed any number of documents into
167
 * a context, as long as no errors occur; once an error occurs,
168
 * the parse context can't continue to parse text (you have to
169
 * free it and create a new parse context).
170
 *
171
 * Returns: a new #GMarkupParseContext
172
 **/
173
GMarkupParseContext *
174
g_markup_parse_context_new (const GMarkupParser *parser,
175
                            GMarkupParseFlags    flags,
176
                            gpointer             user_data,
177
                            GDestroyNotify       user_data_dnotify)
178
0
{
179
0
  GMarkupParseContext *context;
180
181
0
  g_return_val_if_fail (parser != NULL, NULL);
182
183
0
  context = g_new (GMarkupParseContext, 1);
184
185
0
  context->ref_count = 1;
186
0
  context->parser = parser;
187
0
  context->flags = flags;
188
0
  context->user_data = user_data;
189
0
  context->dnotify = user_data_dnotify;
190
191
0
  context->line_number = 1;
192
0
  context->char_number = 1;
193
0
  context->offset = 0;
194
195
0
  context->tag_line = 1;
196
0
  context->tag_char = 1;
197
0
  context->tag_offset = 0;
198
199
0
  context->partial_chunk = NULL;
200
0
  context->spare_chunks = NULL;
201
0
  context->spare_list_nodes = NULL;
202
203
0
  context->state = STATE_INITIAL;
204
0
  context->tag_stack = NULL;
205
0
  context->tag_stack_gstr = NULL;
206
0
  context->attr_names = NULL;
207
0
  context->attr_values = NULL;
208
0
  context->cur_attr = -1;
209
0
  context->alloc_attrs = 0;
210
211
0
  context->current_text = NULL;
212
0
  context->current_text_len = -1;
213
0
  context->current_text_end = NULL;
214
215
0
  context->start = NULL;
216
0
  context->iter = NULL;
217
218
0
  context->document_empty = TRUE;
219
0
  context->parsing = FALSE;
220
221
0
  context->awaiting_pop = FALSE;
222
0
  context->subparser_stack = NULL;
223
0
  context->subparser_element = NULL;
224
225
  /* this is only looked at if awaiting_pop = TRUE.  initialise anyway. */
226
0
  context->held_user_data = NULL;
227
228
0
  context->balance = 0;
229
230
0
  return context;
231
0
}
232
233
/**
234
 * g_markup_parse_context_ref:
235
 * @context: a #GMarkupParseContext
236
 *
237
 * Increases the reference count of @context.
238
 *
239
 * Returns: the same @context
240
 *
241
 * Since: 2.36
242
 **/
243
GMarkupParseContext *
244
g_markup_parse_context_ref (GMarkupParseContext *context)
245
0
{
246
0
  g_return_val_if_fail (context != NULL, NULL);
247
0
  g_return_val_if_fail (context->ref_count > 0, NULL);
248
249
0
  g_atomic_int_inc (&context->ref_count);
250
251
0
  return context;
252
0
}
253
254
/**
255
 * g_markup_parse_context_unref:
256
 * @context: a #GMarkupParseContext
257
 *
258
 * Decreases the reference count of @context.  When its reference count
259
 * drops to 0, it is freed.
260
 *
261
 * Since: 2.36
262
 **/
263
void
264
g_markup_parse_context_unref (GMarkupParseContext *context)
265
0
{
266
0
  g_return_if_fail (context != NULL);
267
0
  g_return_if_fail (context->ref_count > 0);
268
269
0
  if (g_atomic_int_dec_and_test (&context->ref_count))
270
0
    g_markup_parse_context_free (context);
271
0
}
272
273
static void
274
string_full_free (gpointer ptr)
275
0
{
276
0
  g_string_free (ptr, TRUE);
277
0
}
278
279
static void clear_attributes (GMarkupParseContext *context);
280
281
/**
282
 * g_markup_parse_context_free:
283
 * @context: a #GMarkupParseContext
284
 *
285
 * Frees a #GMarkupParseContext.
286
 *
287
 * This function can't be called from inside one of the
288
 * #GMarkupParser functions or while a subparser is pushed.
289
 */
290
void
291
g_markup_parse_context_free (GMarkupParseContext *context)
292
0
{
293
0
  g_return_if_fail (context != NULL);
294
0
  g_return_if_fail (!context->parsing);
295
0
  g_return_if_fail (!context->subparser_stack);
296
0
  g_return_if_fail (!context->awaiting_pop);
297
298
0
  if (context->dnotify)
299
0
    (* context->dnotify) (context->user_data);
300
301
0
  clear_attributes (context);
302
0
  g_free (context->attr_names);
303
0
  g_free (context->attr_values);
304
305
0
  g_slist_free_full (context->tag_stack_gstr, string_full_free);
306
0
  g_slist_free (context->tag_stack);
307
308
0
  g_slist_free_full (context->spare_chunks, string_full_free);
309
0
  g_slist_free (context->spare_list_nodes);
310
311
0
  if (context->partial_chunk)
312
0
    g_string_free (context->partial_chunk, TRUE);
313
314
0
  g_free (context);
315
0
}
316
317
static void pop_subparser_stack (GMarkupParseContext *context);
318
319
static void
320
mark_error (GMarkupParseContext *context,
321
            GError              *error)
322
0
{
323
0
  context->state = STATE_ERROR;
324
325
0
  if (context->parser->error)
326
0
    (*context->parser->error) (context, error, context->user_data);
327
328
  /* report the error all the way up to free all the user-data */
329
0
  while (context->subparser_stack)
330
0
    {
331
0
      pop_subparser_stack (context);
332
0
      context->awaiting_pop = FALSE; /* already been freed */
333
334
0
      if (context->parser->error)
335
0
        (*context->parser->error) (context, error, context->user_data);
336
0
    }
337
0
}
338
339
static void
340
set_error (GMarkupParseContext  *context,
341
           GError              **error,
342
           GMarkupError          code,
343
           const gchar          *format,
344
           ...) G_GNUC_PRINTF (4, 5);
345
346
static void
347
set_error_literal (GMarkupParseContext  *context,
348
                   GError              **error,
349
                   GMarkupError          code,
350
                   const gchar          *message)
351
0
{
352
0
  GError *tmp_error;
353
354
0
  tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message);
355
356
0
  g_prefix_error (&tmp_error,
357
0
                  _("Error on line %d char %d: "),
358
0
                  context->line_number,
359
0
                  context->char_number);
360
361
0
  mark_error (context, tmp_error);
362
363
0
  g_propagate_error (error, tmp_error);
364
0
}
365
366
G_GNUC_PRINTF(4, 5)
367
static void
368
set_error (GMarkupParseContext  *context,
369
           GError              **error,
370
           GMarkupError          code,
371
           const gchar          *format,
372
           ...)
373
0
{
374
0
  gchar *s;
375
0
  gchar *s_valid;
376
0
  va_list args;
377
378
0
  va_start (args, format);
379
0
  s = g_strdup_vprintf (format, args);
380
0
  va_end (args);
381
382
  /* Make sure that the GError message is valid UTF-8
383
   * even if it is complaining about invalid UTF-8 in the markup
384
   */
385
0
  s_valid = g_utf8_make_valid (s, -1);
386
0
  set_error_literal (context, error, code, s);
387
388
0
  g_free (s);
389
0
  g_free (s_valid);
390
0
}
391
392
static void
393
propagate_error (GMarkupParseContext  *context,
394
                 GError              **dest,
395
                 GError               *src)
396
0
{
397
0
  if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
398
0
    g_prefix_error (&src,
399
0
                    _("Error on line %d char %d: "),
400
0
                    context->line_number,
401
0
                    context->char_number);
402
403
0
  mark_error (context, src);
404
405
0
  g_propagate_error (dest, src);
406
0
}
407
408
#define IS_COMMON_NAME_END_CHAR(c) \
409
0
  ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
410
411
static gboolean
412
slow_name_validate (GMarkupParseContext  *context,
413
                    const gchar          *name,
414
                    GError              **error)
415
0
{
416
0
  const gchar *p = name;
417
418
0
  if (!g_utf8_validate (name, -1, NULL))
419
0
    {
420
0
      set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
421
0
                 _("Invalid UTF-8 encoded text in name — not valid “%s”"), name);
422
0
      return FALSE;
423
0
    }
424
425
0
  if (!(g_ascii_isalpha (*p) ||
426
0
        (!IS_COMMON_NAME_END_CHAR (*p) &&
427
0
         (*p == '_' ||
428
0
          *p == ':' ||
429
0
          g_unichar_isalpha (g_utf8_get_char (p))))))
430
0
    {
431
0
      set_error (context, error, G_MARKUP_ERROR_PARSE,
432
0
                 _("“%s” is not a valid name"), name);
433
0
      return FALSE;
434
0
    }
435
436
0
  for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p))
437
0
    {
438
      /* is_name_char */
439
0
      if (!(g_ascii_isalnum (*p) ||
440
0
            (!IS_COMMON_NAME_END_CHAR (*p) &&
441
0
             (*p == '.' ||
442
0
              *p == '-' ||
443
0
              *p == '_' ||
444
0
              *p == ':' ||
445
0
              g_unichar_isalpha (g_utf8_get_char (p))))))
446
0
        {
447
0
          set_error (context, error, G_MARKUP_ERROR_PARSE,
448
0
                     _("“%s” is not a valid name: “%c”"), name, *p);
449
0
          return FALSE;
450
0
        }
451
0
    }
452
0
  return TRUE;
453
0
}
454
455
/*
456
 * Use me for elements, attributes etc.
457
 */
458
static gboolean
459
name_validate (GMarkupParseContext  *context,
460
               const gchar          *name,
461
               GError              **error)
462
0
{
463
0
  char mask;
464
0
  const char *p;
465
466
  /* name start char */
467
0
  p = name;
468
0
  if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) ||
469
0
                  !(g_ascii_isalpha (*p) || *p == '_' || *p == ':')))
470
0
    goto slow_validate;
471
472
0
  for (mask = *p++; *p != '\0'; p++)
473
0
    {
474
0
      mask |= *p;
475
476
      /* is_name_char */
477
0
      if (G_UNLIKELY (!(g_ascii_isalnum (*p) ||
478
0
                        (!IS_COMMON_NAME_END_CHAR (*p) &&
479
0
                         (*p == '.' ||
480
0
                          *p == '-' ||
481
0
                          *p == '_' ||
482
0
                          *p == ':')))))
483
0
        goto slow_validate;
484
0
    }
485
486
0
  if (mask & 0x80) /* un-common / non-ascii */
487
0
    goto slow_validate;
488
489
0
  return TRUE;
490
491
0
 slow_validate:
492
0
  return slow_name_validate (context, name, error);
493
0
}
494
495
static gboolean
496
text_validate (GMarkupParseContext  *context,
497
               const gchar          *p,
498
               gsize                 len,
499
               GError              **error)
500
0
{
501
0
  if (!g_utf8_validate_len (p, len, NULL))
502
0
    {
503
0
      set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
504
0
                 _("Invalid UTF-8 encoded text in name — not valid “%s”"), p);
505
0
      return FALSE;
506
0
    }
507
0
  else
508
0
    return TRUE;
509
0
}
510
511
static gchar*
512
char_str (gunichar c,
513
          gchar   *buf)
514
0
{
515
0
  memset (buf, 0, 8);
516
0
  g_unichar_to_utf8 (c, buf);
517
0
  return buf;
518
0
}
519
520
/* Format the next UTF-8 character as a gchar* for printing in error output
521
 * when we encounter a syntax error. This correctly handles invalid UTF-8,
522
 * emitting it as hex escapes. */
523
static gchar*
524
utf8_str (const gchar *utf8,
525
          gsize        max_len,
526
          gchar       *buf)
527
0
{
528
0
  gunichar c = g_utf8_get_char_validated (utf8, max_len);
529
0
  if (c == (gunichar) -1 || c == (gunichar) -2)
530
0
    {
531
0
      guchar ch = (max_len > 0) ? (guchar) *utf8 : 0;
532
0
      gchar *temp = g_strdup_printf ("\\x%02x", (guint) ch);
533
0
      memset (buf, 0, 8);
534
0
      memcpy (buf, temp, strlen (temp));
535
0
      g_free (temp);
536
0
    }
537
0
  else
538
0
    char_str (c, buf);
539
0
  return buf;
540
0
}
541
542
G_GNUC_PRINTF(5, 6)
543
static void
544
set_unescape_error (GMarkupParseContext  *context,
545
                    GError              **error,
546
                    const gchar          *remaining_text,
547
                    GMarkupError          code,
548
                    const gchar          *format,
549
                    ...)
550
0
{
551
0
  GError *tmp_error;
552
0
  gchar *s;
553
0
  va_list args;
554
0
  gint remaining_newlines;
555
0
  const gchar *p;
556
557
0
  remaining_newlines = 0;
558
0
  p = remaining_text;
559
0
  while (*p != '\0')
560
0
    {
561
0
      if (*p == '\n')
562
0
        ++remaining_newlines;
563
0
      ++p;
564
0
    }
565
566
0
  va_start (args, format);
567
0
  s = g_strdup_vprintf (format, args);
568
0
  va_end (args);
569
570
0
  tmp_error = g_error_new (G_MARKUP_ERROR,
571
0
                           code,
572
0
                           _("Error on line %d: %s"),
573
0
                           context->line_number - remaining_newlines,
574
0
                           s);
575
576
0
  g_free (s);
577
578
0
  mark_error (context, tmp_error);
579
580
0
  g_propagate_error (error, tmp_error);
581
0
}
582
583
/*
584
 * re-write the GString in-place, unescaping anything that escaped.
585
 * most XML does not contain entities, or escaping.
586
 */
587
static gboolean
588
unescape_gstring_inplace (GMarkupParseContext  *context,
589
                          GString              *string,
590
                          gboolean             *is_ascii,
591
                          GError              **error)
592
0
{
593
0
  char mask, *to;
594
0
  const char *from;
595
0
  gboolean normalize_attribute;
596
597
0
  *is_ascii = FALSE;
598
599
  /* are we unescaping an attribute or not ? */
600
0
  if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
601
0
      context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
602
0
    normalize_attribute = TRUE;
603
0
  else
604
0
    normalize_attribute = FALSE;
605
606
  /*
607
   * Meeks' theorem: unescaping can only shrink text.
608
   * for &lt; etc. this is obvious, for &#xffff; more
609
   * thought is required, but this is patently so.
610
   */
611
0
  mask = 0;
612
0
  for (from = to = string->str; *from != '\0'; from++, to++)
613
0
    {
614
0
      *to = *from;
615
616
0
      mask |= *to;
617
0
      if (normalize_attribute && (*to == '\t' || *to == '\n'))
618
0
        *to = ' ';
619
0
      if (*to == '\r')
620
0
        {
621
0
          *to = normalize_attribute ? ' ' : '\n';
622
0
          if (from[1] == '\n')
623
0
            from++;
624
0
        }
625
0
      if (*from == '&')
626
0
        {
627
0
          from++;
628
0
          if (*from == '#')
629
0
            {
630
0
              gint base = 10;
631
0
              gulong l;
632
0
              gchar *end = NULL;
633
634
0
              from++;
635
636
0
              if (*from == 'x')
637
0
                {
638
0
                  base = 16;
639
0
                  from++;
640
0
                }
641
642
0
              errno = 0;
643
0
              l = strtoul (from, &end, base);
644
645
0
              if (end == from || errno != 0)
646
0
                {
647
0
                  set_unescape_error (context, error,
648
0
                                      from, G_MARKUP_ERROR_PARSE,
649
0
                                      _("Failed to parse “%-.*s”, which "
650
0
                                        "should have been a digit "
651
0
                                        "inside a character reference "
652
0
                                        "(&#234; for example) — perhaps "
653
0
                                        "the digit is too large"),
654
0
                                      (int)(end - from), from);
655
0
                  return FALSE;
656
0
                }
657
0
              else if (*end != ';')
658
0
                {
659
0
                  set_unescape_error (context, error,
660
0
                                      from, G_MARKUP_ERROR_PARSE,
661
0
                                      _("Character reference did not end with a "
662
0
                                        "semicolon; "
663
0
                                        "most likely you used an ampersand "
664
0
                                        "character without intending to start "
665
0
                                        "an entity — escape ampersand as &amp;"));
666
0
                  return FALSE;
667
0
                }
668
0
              else
669
0
                {
670
                  /* characters XML 1.1 permits */
671
0
                  if ((0 < l && l <= 0xD7FF) ||
672
0
                      (0xE000 <= l && l <= 0xFFFD) ||
673
0
                      (0x10000 <= l && l <= 0x10FFFF))
674
0
                    {
675
0
                      gchar buf[8];
676
0
                      char_str (l, buf);
677
0
                      strcpy (to, buf);
678
0
                      to += strlen (buf) - 1;
679
0
                      from = end;
680
0
                      if (l >= 0x80) /* not ascii */
681
0
                        mask |= 0x80;
682
0
                    }
683
0
                  else
684
0
                    {
685
0
                      set_unescape_error (context, error,
686
0
                                          from, G_MARKUP_ERROR_PARSE,
687
0
                                          _("Character reference “%-.*s” does not "
688
0
                                            "encode a permitted character"),
689
0
                                          (int)(end - from), from);
690
0
                      return FALSE;
691
0
                    }
692
0
                }
693
0
            }
694
695
0
          else if (strncmp (from, "lt;", 3) == 0)
696
0
            {
697
0
              *to = '<';
698
0
              from += 2;
699
0
            }
700
0
          else if (strncmp (from, "gt;", 3) == 0)
701
0
            {
702
0
              *to = '>';
703
0
              from += 2;
704
0
            }
705
0
          else if (strncmp (from, "amp;", 4) == 0)
706
0
            {
707
0
              *to = '&';
708
0
              from += 3;
709
0
            }
710
0
          else if (strncmp (from, "quot;", 5) == 0)
711
0
            {
712
0
              *to = '"';
713
0
              from += 4;
714
0
            }
715
0
          else if (strncmp (from, "apos;", 5) == 0)
716
0
            {
717
0
              *to = '\'';
718
0
              from += 4;
719
0
            }
720
0
          else
721
0
            {
722
0
              if (*from == ';')
723
0
                set_unescape_error (context, error,
724
0
                                    from, G_MARKUP_ERROR_PARSE,
725
0
                                    _("Empty entity “&;” seen; valid "
726
0
                                      "entities are: &amp; &quot; &lt; &gt; &apos;"));
727
0
              else
728
0
                {
729
0
                  const char *end = strchr (from, ';');
730
0
                  if (end)
731
0
                    set_unescape_error (context, error,
732
0
                                        from, G_MARKUP_ERROR_PARSE,
733
0
                                        _("Entity name “%-.*s” is not known"),
734
0
                                        (int)(end - from), from);
735
0
                  else
736
0
                    set_unescape_error (context, error,
737
0
                                        from, G_MARKUP_ERROR_PARSE,
738
0
                                        _("Entity did not end with a semicolon; "
739
0
                                          "most likely you used an ampersand "
740
0
                                          "character without intending to start "
741
0
                                          "an entity — escape ampersand as &amp;"));
742
0
                }
743
0
              return FALSE;
744
0
            }
745
0
        }
746
0
    }
747
748
0
  g_assert (to - string->str <= (gssize) string->len);
749
0
  if (to - string->str != (gssize) string->len)
750
0
    g_string_truncate (string, to - string->str);
751
752
0
  *is_ascii = !(mask & 0x80);
753
754
0
  return TRUE;
755
0
}
756
757
static inline gboolean
758
advance_char (GMarkupParseContext *context)
759
0
{
760
0
  context->iter++;
761
0
  context->char_number++;
762
0
  context->offset++;
763
764
0
  if (G_UNLIKELY (context->iter == context->current_text_end))
765
0
      return FALSE;
766
767
0
  else if (G_UNLIKELY (*context->iter == '\n'))
768
0
    {
769
0
      context->line_number++;
770
0
      context->char_number = 1;
771
0
    }
772
773
0
  return TRUE;
774
0
}
775
776
static inline gboolean
777
xml_isspace (char c)
778
0
{
779
0
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
780
0
}
781
782
static void
783
skip_spaces (GMarkupParseContext *context)
784
0
{
785
0
  do
786
0
    {
787
0
      if (!xml_isspace (*context->iter))
788
0
        return;
789
0
    }
790
0
  while (advance_char (context));
791
0
}
792
793
static void
794
advance_to_name_end (GMarkupParseContext *context)
795
0
{
796
0
  do
797
0
    {
798
0
      if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
799
0
        return;
800
0
      if (xml_isspace (*(context->iter)))
801
0
        return;
802
0
    }
803
0
  while (advance_char (context));
804
0
}
805
806
static void
807
release_chunk (GMarkupParseContext *context, GString *str)
808
0
{
809
0
  GSList *node;
810
0
  if (!str)
811
0
    return;
812
0
  if (str->allocated_len > 256)
813
0
    { /* large strings are unusual and worth freeing */
814
0
      g_string_free (str, TRUE);
815
0
      return;
816
0
    }
817
0
  g_string_truncate (str, 0);
818
0
  node = get_list_node (context, str);
819
0
  context->spare_chunks = g_slist_concat (node, context->spare_chunks);
820
0
}
821
822
static void
823
add_to_partial (GMarkupParseContext *context,
824
                const gchar         *text_start,
825
                const gchar         *text_end)
826
0
{
827
0
  if (context->partial_chunk == NULL)
828
0
    { /* allocate a new chunk to parse into */
829
830
0
      if (context->spare_chunks != NULL)
831
0
        {
832
0
          GSList *node = context->spare_chunks;
833
0
          context->spare_chunks = g_slist_remove_link (context->spare_chunks, node);
834
0
          context->partial_chunk = node->data;
835
0
          free_list_node (context, node);
836
0
        }
837
0
      else
838
0
        context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start));
839
0
    }
840
841
0
  if (text_start != text_end)
842
0
    g_string_append_len (context->partial_chunk,
843
0
                         text_start, text_end - text_start);
844
0
}
845
846
static inline void
847
truncate_partial (GMarkupParseContext *context)
848
0
{
849
0
  if (context->partial_chunk != NULL)
850
0
    g_string_truncate (context->partial_chunk, 0);
851
0
}
852
853
static inline const gchar*
854
current_element (GMarkupParseContext *context)
855
0
{
856
0
  return context->tag_stack->data;
857
0
}
858
859
static void
860
pop_subparser_stack (GMarkupParseContext *context)
861
0
{
862
0
  GMarkupRecursionTracker *tracker;
863
864
0
  g_assert (context->subparser_stack);
865
866
0
  tracker = context->subparser_stack->data;
867
868
0
  context->awaiting_pop = TRUE;
869
0
  context->held_user_data = context->user_data;
870
871
0
  context->user_data = tracker->prev_user_data;
872
0
  context->parser = tracker->prev_parser;
873
0
  context->subparser_element = tracker->prev_element;
874
0
  g_slice_free (GMarkupRecursionTracker, tracker);
875
876
0
  context->subparser_stack = g_slist_delete_link (context->subparser_stack,
877
0
                                                  context->subparser_stack);
878
0
}
879
880
static void
881
push_partial_as_tag (GMarkupParseContext *context)
882
0
{
883
0
  GString *str = context->partial_chunk;
884
885
  /* sadly, this is exported by gmarkup_get_element_stack as-is */
886
0
  context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack);
887
0
  context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr);
888
889
0
  context->partial_chunk = NULL;
890
0
}
891
892
static void
893
pop_tag (GMarkupParseContext *context)
894
0
{
895
0
  GSList *nodea, *nodeb;
896
897
0
  nodea = context->tag_stack;
898
0
  nodeb = context->tag_stack_gstr;
899
0
  release_chunk (context, nodeb->data);
900
0
  context->tag_stack = g_slist_remove_link (context->tag_stack, nodea);
901
0
  context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb);
902
0
  free_list_node (context, nodea);
903
0
  free_list_node (context, nodeb);
904
0
}
905
906
static void
907
possibly_finish_subparser (GMarkupParseContext *context)
908
0
{
909
0
  if (current_element (context) == context->subparser_element)
910
0
    pop_subparser_stack (context);
911
0
}
912
913
static void
914
ensure_no_outstanding_subparser (GMarkupParseContext *context)
915
0
{
916
0
  if (context->awaiting_pop)
917
0
    g_critical ("During the first end_element call after invoking a "
918
0
                "subparser you must pop the subparser stack and handle "
919
0
                "the freeing of the subparser user_data.  This can be "
920
0
                "done by calling the end function of the subparser.  "
921
0
                "Very probably, your program just leaked memory.");
922
923
  /* let valgrind watch the pointer disappear... */
924
0
  context->held_user_data = NULL;
925
0
  context->awaiting_pop = FALSE;
926
0
}
927
928
static const gchar*
929
current_attribute (GMarkupParseContext *context)
930
0
{
931
0
  g_assert (context->cur_attr >= 0);
932
0
  return context->attr_names[context->cur_attr]->str;
933
0
}
934
935
static gboolean
936
add_attribute (GMarkupParseContext *context, GString *str)
937
0
{
938
  /* Sanity check on the number of attributes. */
939
0
  if (context->cur_attr >= 1000)
940
0
    return FALSE;
941
942
0
  if (context->cur_attr + 2 >= context->alloc_attrs)
943
0
    {
944
0
      context->alloc_attrs += 5; /* silly magic number */
945
0
      context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs);
946
0
      context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs);
947
0
    }
948
0
  context->cur_attr++;
949
0
  context->attr_names[context->cur_attr] = str;
950
0
  context->attr_values[context->cur_attr] = NULL;
951
0
  context->attr_names[context->cur_attr+1] = NULL;
952
0
  context->attr_values[context->cur_attr+1] = NULL;
953
954
0
  return TRUE;
955
0
}
956
957
static void
958
clear_attributes (GMarkupParseContext *context)
959
0
{
960
  /* Go ahead and free the attributes. */
961
0
  for (; context->cur_attr >= 0; context->cur_attr--)
962
0
    {
963
0
      int pos = context->cur_attr;
964
0
      release_chunk (context, context->attr_names[pos]);
965
0
      release_chunk (context, context->attr_values[pos]);
966
0
      context->attr_names[pos] = context->attr_values[pos] = NULL;
967
0
    }
968
0
  g_assert (context->cur_attr == -1);
969
0
  g_assert (context->attr_names == NULL ||
970
0
            context->attr_names[0] == NULL);
971
0
  g_assert (context->attr_values == NULL ||
972
0
            context->attr_values[0] == NULL);
973
0
}
974
975
/* This has to be a separate function to ensure the alloca's
976
 * are unwound on exit - otherwise we grow & blow the stack
977
 * with large documents
978
 */
979
static inline void
980
emit_start_element (GMarkupParseContext  *context,
981
                    GError              **error)
982
0
{
983
0
  int i, j = 0;
984
0
  const gchar *start_name;
985
0
  const gchar **attr_names;
986
0
  const gchar **attr_values;
987
0
  GError *tmp_error;
988
989
  /* In case we want to ignore qualified tags and we see that we have
990
   * one here, we push a subparser.  This will ignore all tags inside of
991
   * the qualified tag.
992
   *
993
   * We deal with the end of the subparser from emit_end_element.
994
   */
995
0
  if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
996
0
    {
997
0
      static const GMarkupParser ignore_parser = { 0 };
998
0
      g_markup_parse_context_push (context, &ignore_parser, NULL);
999
0
      clear_attributes (context);
1000
0
      return;
1001
0
    }
1002
1003
0
  attr_names = g_newa (const gchar *, context->cur_attr + 2);
1004
0
  attr_values = g_newa (const gchar *, context->cur_attr + 2);
1005
0
  for (i = 0; i < context->cur_attr + 1; i++)
1006
0
    {
1007
      /* Possibly omit qualified attribute names from the list */
1008
0
      if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (context->attr_names[i]->str, ':'))
1009
0
        continue;
1010
1011
0
      attr_names[j] = context->attr_names[i]->str;
1012
0
      attr_values[j] = context->attr_values[i]->str;
1013
0
      j++;
1014
0
    }
1015
0
  attr_names[j] = NULL;
1016
0
  attr_values[j] = NULL;
1017
1018
  /* Call user callback for element start */
1019
0
  tmp_error = NULL;
1020
0
  start_name = current_element (context);
1021
1022
0
  if (!name_validate (context, start_name, error))
1023
0
    return;
1024
1025
0
  if (context->parser->start_element)
1026
0
    (* context->parser->start_element) (context,
1027
0
                                        start_name,
1028
0
                                        (const gchar **)attr_names,
1029
0
                                        (const gchar **)attr_values,
1030
0
                                        context->user_data,
1031
0
                                        &tmp_error);
1032
1033
0
  clear_attributes (context);
1034
1035
0
  if (tmp_error != NULL)
1036
0
    propagate_error (context, error, tmp_error);
1037
0
}
1038
1039
static void
1040
emit_end_element (GMarkupParseContext  *context,
1041
                  GError              **error)
1042
0
{
1043
  /* We need to pop the tag stack and call the end_element
1044
   * function, since this is the close tag
1045
   */
1046
0
  GError *tmp_error = NULL;
1047
1048
0
  g_assert (context->tag_stack != NULL);
1049
1050
0
  possibly_finish_subparser (context);
1051
1052
  /* We might have just returned from our ignore subparser */
1053
0
  if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1054
0
    {
1055
0
      g_markup_parse_context_pop (context);
1056
0
      pop_tag (context);
1057
0
      return;
1058
0
    }
1059
1060
0
  tmp_error = NULL;
1061
0
  if (context->parser->end_element)
1062
0
    (* context->parser->end_element) (context,
1063
0
                                      current_element (context),
1064
0
                                      context->user_data,
1065
0
                                      &tmp_error);
1066
1067
0
  ensure_no_outstanding_subparser (context);
1068
1069
0
  if (tmp_error)
1070
0
    {
1071
0
      mark_error (context, tmp_error);
1072
0
      g_propagate_error (error, tmp_error);
1073
0
    }
1074
1075
0
  pop_tag (context);
1076
0
}
1077
1078
/**
1079
 * g_markup_parse_context_parse:
1080
 * @context: a #GMarkupParseContext
1081
 * @text: chunk of text to parse
1082
 * @text_len: length of @text in bytes
1083
 * @error: return location for a #GError
1084
 *
1085
 * Feed some data to the #GMarkupParseContext.
1086
 *
1087
 * The data need not be valid UTF-8; an error will be signaled if
1088
 * it's invalid. The data need not be an entire document; you can
1089
 * feed a document into the parser incrementally, via multiple calls
1090
 * to this function. Typically, as you receive data from a network
1091
 * connection or file, you feed each received chunk of data into this
1092
 * function, aborting the process if an error occurs. Once an error
1093
 * is reported, no further data may be fed to the #GMarkupParseContext;
1094
 * all errors are fatal.
1095
 *
1096
 * Returns: %FALSE if an error occurred, %TRUE on success
1097
 */
1098
gboolean
1099
g_markup_parse_context_parse (GMarkupParseContext  *context,
1100
                              const gchar          *text,
1101
                              gssize                text_len,
1102
                              GError              **error)
1103
0
{
1104
0
  g_return_val_if_fail (context != NULL, FALSE);
1105
0
  g_return_val_if_fail (text != NULL, FALSE);
1106
0
  g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1107
0
  g_return_val_if_fail (!context->parsing, FALSE);
1108
1109
0
  if (text_len < 0)
1110
0
    text_len = strlen (text);
1111
1112
0
  if (text_len == 0)
1113
0
    return TRUE;
1114
1115
0
  context->parsing = TRUE;
1116
1117
0
  context->current_text = text;
1118
0
  context->current_text_len = text_len;
1119
0
  context->current_text_end = context->current_text + text_len;
1120
0
  context->iter = context->current_text;
1121
0
  context->start = context->iter;
1122
1123
0
  while (context->iter != context->current_text_end)
1124
0
    {
1125
0
      switch (context->state)
1126
0
        {
1127
0
        case STATE_INITIAL:
1128
0
          if ((guchar) *context->iter == 0xef)
1129
0
            {
1130
0
              advance_char (context);
1131
0
              context->state = STATE_AFTER_BOM1;
1132
0
            }
1133
0
          else
1134
0
            {
1135
0
              context->state = STATE_START;
1136
0
            }
1137
0
          break;
1138
1139
0
        case STATE_AFTER_BOM1:
1140
0
          if ((guchar) *context->iter == 0xbb)
1141
0
            {
1142
0
              advance_char (context);
1143
0
              context->state = STATE_AFTER_BOM2;
1144
0
            }
1145
0
          else
1146
0
            {
1147
0
              set_error_literal (context,
1148
0
                                 error,
1149
0
                                 G_MARKUP_ERROR_PARSE,
1150
0
                                 _("Invalid byte order mark"));
1151
0
            }
1152
0
          break;
1153
1154
0
        case STATE_AFTER_BOM2:
1155
0
          if ((guchar) *context->iter == 0xbf)
1156
0
            {
1157
0
              advance_char (context);
1158
0
              context->state = STATE_START;
1159
0
            }
1160
0
          else
1161
0
            {
1162
0
              set_error_literal (context,
1163
0
                                 error,
1164
0
                                 G_MARKUP_ERROR_PARSE,
1165
0
                                 _("Invalid byte order mark"));
1166
0
            }
1167
0
          break;
1168
1169
0
        case STATE_START:
1170
          /* Possible next state: AFTER_OPEN_ANGLE */
1171
1172
0
          g_assert (context->tag_stack == NULL);
1173
1174
          /* whitespace is ignored outside of any elements */
1175
0
          skip_spaces (context);
1176
1177
0
          if (context->iter != context->current_text_end)
1178
0
            {
1179
0
              if (*context->iter == '<')
1180
0
                {
1181
                  /* Move after the open angle */
1182
0
                  advance_char (context);
1183
1184
0
                  context->state = STATE_AFTER_OPEN_ANGLE;
1185
1186
                  /* this could start a passthrough */
1187
0
                  context->start = context->iter;
1188
1189
                  /* document is now non-empty */
1190
0
                  context->document_empty = FALSE;
1191
0
                }
1192
0
              else
1193
0
                {
1194
0
                  set_error_literal (context,
1195
0
                                     error,
1196
0
                                     G_MARKUP_ERROR_PARSE,
1197
0
                                     _("Document must begin with an element (e.g. <book>)"));
1198
0
                }
1199
0
            }
1200
0
          break;
1201
1202
0
        case STATE_AFTER_OPEN_ANGLE:
1203
          /* Possible next states: INSIDE_OPEN_TAG_NAME,
1204
           *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1205
           */
1206
0
          context->tag_line = context->line_number;
1207
0
          context->tag_char = context->char_number - 1;
1208
0
          context->tag_offset = context->offset - 1;
1209
1210
0
          if (*context->iter == '?' ||
1211
0
              *context->iter == '!')
1212
0
            {
1213
              /* include < in the passthrough */
1214
0
              const gchar *openangle = "<";
1215
0
              add_to_partial (context, openangle, openangle + 1);
1216
0
              context->start = context->iter;
1217
0
              context->balance = 1;
1218
0
              context->state = STATE_INSIDE_PASSTHROUGH;
1219
0
            }
1220
0
          else if (*context->iter == '/')
1221
0
            {
1222
              /* move after it */
1223
0
              advance_char (context);
1224
1225
0
              context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1226
0
            }
1227
0
          else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1228
0
            {
1229
0
              context->state = STATE_INSIDE_OPEN_TAG_NAME;
1230
1231
              /* start of tag name */
1232
0
              context->start = context->iter;
1233
0
            }
1234
0
          else
1235
0
            {
1236
0
              gchar buf[8];
1237
1238
0
              set_error (context,
1239
0
                         error,
1240
0
                         G_MARKUP_ERROR_PARSE,
1241
0
                         _("“%s” is not a valid character following "
1242
0
                           "a “<” character; it may not begin an "
1243
0
                           "element name"),
1244
0
                         utf8_str (context->iter,
1245
0
                                   context->current_text_end - context->iter, buf));
1246
0
            }
1247
0
          break;
1248
1249
          /* The AFTER_CLOSE_ANGLE state is actually sort of
1250
           * broken, because it doesn't correspond to a range
1251
           * of characters in the input stream as the others do,
1252
           * and thus makes things harder to conceptualize
1253
           */
1254
0
        case STATE_AFTER_CLOSE_ANGLE:
1255
          /* Possible next states: INSIDE_TEXT, STATE_START */
1256
0
          if (context->tag_stack == NULL)
1257
0
            {
1258
0
              context->start = NULL;
1259
0
              context->state = STATE_START;
1260
0
            }
1261
0
          else
1262
0
            {
1263
0
              context->start = context->iter;
1264
0
              context->state = STATE_INSIDE_TEXT;
1265
0
            }
1266
0
          break;
1267
1268
0
        case STATE_AFTER_ELISION_SLASH:
1269
          /* Possible next state: AFTER_CLOSE_ANGLE */
1270
0
          if (*context->iter == '>')
1271
0
            {
1272
              /* move after the close angle */
1273
0
              advance_char (context);
1274
0
              context->state = STATE_AFTER_CLOSE_ANGLE;
1275
0
              emit_end_element (context, error);
1276
0
            }
1277
0
          else
1278
0
            {
1279
0
              gchar buf[8];
1280
1281
0
              set_error (context,
1282
0
                         error,
1283
0
                         G_MARKUP_ERROR_PARSE,
1284
0
                         _("Odd character “%s”, expected a “>” character "
1285
0
                           "to end the empty-element tag “%s”"),
1286
0
                         utf8_str (context->iter,
1287
0
                                   context->current_text_end - context->iter, buf),
1288
0
                         current_element (context));
1289
0
            }
1290
0
          break;
1291
1292
0
        case STATE_INSIDE_OPEN_TAG_NAME:
1293
          /* Possible next states: BETWEEN_ATTRIBUTES */
1294
1295
          /* if there's a partial chunk then it's the first part of the
1296
           * tag name. If there's a context->start then it's the start
1297
           * of the tag name in current_text, the partial chunk goes
1298
           * before that start though.
1299
           */
1300
0
          advance_to_name_end (context);
1301
1302
0
          if (context->iter == context->current_text_end)
1303
0
            {
1304
              /* The name hasn't necessarily ended. Merge with
1305
               * partial chunk, leave state unchanged.
1306
               */
1307
0
              add_to_partial (context, context->start, context->iter);
1308
0
            }
1309
0
          else
1310
0
            {
1311
              /* The name has ended. Combine it with the partial chunk
1312
               * if any; push it on the stack; enter next state.
1313
               */
1314
0
              add_to_partial (context, context->start, context->iter);
1315
0
              push_partial_as_tag (context);
1316
1317
0
              context->state = STATE_BETWEEN_ATTRIBUTES;
1318
0
              context->start = NULL;
1319
0
            }
1320
0
          break;
1321
1322
0
        case STATE_INSIDE_ATTRIBUTE_NAME:
1323
          /* Possible next states: AFTER_ATTRIBUTE_NAME */
1324
1325
0
          advance_to_name_end (context);
1326
0
          add_to_partial (context, context->start, context->iter);
1327
1328
          /* read the full name, if we enter the equals sign state
1329
           * then add the attribute to the list (without the value),
1330
           * otherwise store a partial chunk to be prepended later.
1331
           */
1332
0
          if (context->iter != context->current_text_end)
1333
0
            context->state = STATE_AFTER_ATTRIBUTE_NAME;
1334
0
          break;
1335
1336
0
        case STATE_AFTER_ATTRIBUTE_NAME:
1337
          /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1338
1339
0
          skip_spaces (context);
1340
1341
0
          if (context->iter != context->current_text_end)
1342
0
            {
1343
              /* The name has ended. Combine it with the partial chunk
1344
               * if any; push it on the stack; enter next state.
1345
               */
1346
0
              if (!name_validate (context, context->partial_chunk->str, error))
1347
0
                break;
1348
1349
0
              if (!add_attribute (context, context->partial_chunk))
1350
0
                {
1351
0
                  set_error (context,
1352
0
                             error,
1353
0
                             G_MARKUP_ERROR_PARSE,
1354
0
                             _("Too many attributes in element “%s”"),
1355
0
                             current_element (context));
1356
0
                  break;
1357
0
                }
1358
1359
0
              context->partial_chunk = NULL;
1360
0
              context->start = NULL;
1361
1362
0
              if (*context->iter == '=')
1363
0
                {
1364
0
                  advance_char (context);
1365
0
                  context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1366
0
                }
1367
0
              else
1368
0
                {
1369
0
                  gchar buf[8];
1370
1371
0
                  set_error (context,
1372
0
                             error,
1373
0
                             G_MARKUP_ERROR_PARSE,
1374
0
                             _("Odd character “%s”, expected a “=” after "
1375
0
                               "attribute name “%s” of element “%s”"),
1376
0
                             utf8_str (context->iter,
1377
0
                                       context->current_text_end - context->iter, buf),
1378
0
                             current_attribute (context),
1379
0
                             current_element (context));
1380
1381
0
                }
1382
0
            }
1383
0
          break;
1384
1385
0
        case STATE_BETWEEN_ATTRIBUTES:
1386
          /* Possible next states: AFTER_CLOSE_ANGLE,
1387
           * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1388
           */
1389
0
          skip_spaces (context);
1390
1391
0
          if (context->iter != context->current_text_end)
1392
0
            {
1393
0
              if (*context->iter == '/')
1394
0
                {
1395
0
                  advance_char (context);
1396
0
                  context->state = STATE_AFTER_ELISION_SLASH;
1397
0
                }
1398
0
              else if (*context->iter == '>')
1399
0
                {
1400
0
                  advance_char (context);
1401
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1402
0
                }
1403
0
              else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1404
0
                {
1405
0
                  context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1406
                  /* start of attribute name */
1407
0
                  context->start = context->iter;
1408
0
                }
1409
0
              else
1410
0
                {
1411
0
                  gchar buf[8];
1412
1413
0
                  set_error (context,
1414
0
                             error,
1415
0
                             G_MARKUP_ERROR_PARSE,
1416
0
                             _("Odd character “%s”, expected a “>” or “/” "
1417
0
                               "character to end the start tag of "
1418
0
                               "element “%s”, or optionally an attribute; "
1419
0
                               "perhaps you used an invalid character in "
1420
0
                               "an attribute name"),
1421
0
                             utf8_str (context->iter,
1422
0
                                       context->current_text_end - context->iter, buf),
1423
0
                             current_element (context));
1424
0
                }
1425
1426
              /* If we're done with attributes, invoke
1427
               * the start_element callback
1428
               */
1429
0
              if (context->state == STATE_AFTER_ELISION_SLASH ||
1430
0
                  context->state == STATE_AFTER_CLOSE_ANGLE)
1431
0
                emit_start_element (context, error);
1432
0
            }
1433
0
          break;
1434
1435
0
        case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1436
          /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1437
1438
0
          skip_spaces (context);
1439
1440
0
          if (context->iter != context->current_text_end)
1441
0
            {
1442
0
              if (*context->iter == '"')
1443
0
                {
1444
0
                  advance_char (context);
1445
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1446
0
                  context->start = context->iter;
1447
0
                }
1448
0
              else if (*context->iter == '\'')
1449
0
                {
1450
0
                  advance_char (context);
1451
0
                  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1452
0
                  context->start = context->iter;
1453
0
                }
1454
0
              else
1455
0
                {
1456
0
                  gchar buf[8];
1457
1458
0
                  set_error (context,
1459
0
                             error,
1460
0
                             G_MARKUP_ERROR_PARSE,
1461
0
                             _("Odd character “%s”, expected an open quote mark "
1462
0
                               "after the equals sign when giving value for "
1463
0
                               "attribute “%s” of element “%s”"),
1464
0
                             utf8_str (context->iter,
1465
0
                                       context->current_text_end - context->iter, buf),
1466
0
                             current_attribute (context),
1467
0
                             current_element (context));
1468
0
                }
1469
0
            }
1470
0
          break;
1471
1472
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1473
0
        case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1474
          /* Possible next states: BETWEEN_ATTRIBUTES */
1475
0
          {
1476
0
            gchar delim;
1477
1478
0
            if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1479
0
              {
1480
0
                delim = '\'';
1481
0
              }
1482
0
            else
1483
0
              {
1484
0
                delim = '"';
1485
0
              }
1486
1487
0
            do
1488
0
              {
1489
0
                if (*context->iter == delim)
1490
0
                  break;
1491
0
              }
1492
0
            while (advance_char (context));
1493
0
          }
1494
0
          if (context->iter == context->current_text_end)
1495
0
            {
1496
              /* The value hasn't necessarily ended. Merge with
1497
               * partial chunk, leave state unchanged.
1498
               */
1499
0
              add_to_partial (context, context->start, context->iter);
1500
0
            }
1501
0
          else
1502
0
            {
1503
0
              gboolean is_ascii;
1504
              /* The value has ended at the quote mark. Combine it
1505
               * with the partial chunk if any; set it for the current
1506
               * attribute.
1507
               */
1508
0
              add_to_partial (context, context->start, context->iter);
1509
1510
0
              g_assert (context->cur_attr >= 0);
1511
1512
0
              if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1513
0
                  (is_ascii || text_validate (context, context->partial_chunk->str,
1514
0
                                              context->partial_chunk->len, error)))
1515
0
                {
1516
                  /* success, advance past quote and set state. */
1517
0
                  context->attr_values[context->cur_attr] = context->partial_chunk;
1518
0
                  context->partial_chunk = NULL;
1519
0
                  advance_char (context);
1520
0
                  context->state = STATE_BETWEEN_ATTRIBUTES;
1521
0
                  context->start = NULL;
1522
0
                }
1523
1524
0
              truncate_partial (context);
1525
0
            }
1526
0
          break;
1527
1528
0
        case STATE_INSIDE_TEXT:
1529
          /* Possible next states: AFTER_OPEN_ANGLE */
1530
0
          do
1531
0
            {
1532
0
              if (*context->iter == '<')
1533
0
                break;
1534
0
            }
1535
0
          while (advance_char (context));
1536
1537
          /* The text hasn't necessarily ended. Merge with
1538
           * partial chunk, leave state unchanged.
1539
           */
1540
1541
0
          add_to_partial (context, context->start, context->iter);
1542
1543
0
          if (context->iter != context->current_text_end)
1544
0
            {
1545
0
              gboolean is_ascii;
1546
1547
              /* The text has ended at the open angle. Call the text
1548
               * callback.
1549
               */
1550
0
              if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1551
0
                  (is_ascii || text_validate (context, context->partial_chunk->str,
1552
0
                                              context->partial_chunk->len, error)))
1553
0
                {
1554
0
                  GError *tmp_error = NULL;
1555
1556
0
                  if (context->parser->text)
1557
0
                    (*context->parser->text) (context,
1558
0
                                              context->partial_chunk->str,
1559
0
                                              context->partial_chunk->len,
1560
0
                                              context->user_data,
1561
0
                                              &tmp_error);
1562
1563
0
                  if (tmp_error == NULL)
1564
0
                    {
1565
                      /* advance past open angle and set state. */
1566
0
                      advance_char (context);
1567
0
                      context->state = STATE_AFTER_OPEN_ANGLE;
1568
                      /* could begin a passthrough */
1569
0
                      context->start = context->iter;
1570
0
                    }
1571
0
                  else
1572
0
                    propagate_error (context, error, tmp_error);
1573
0
                }
1574
1575
0
              truncate_partial (context);
1576
0
            }
1577
0
          break;
1578
1579
0
        case STATE_AFTER_CLOSE_TAG_SLASH:
1580
          /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1581
0
          if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1582
0
            {
1583
0
              context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1584
1585
              /* start of tag name */
1586
0
              context->start = context->iter;
1587
0
            }
1588
0
          else
1589
0
            {
1590
0
              gchar buf[8];
1591
1592
0
              set_error (context,
1593
0
                         error,
1594
0
                         G_MARKUP_ERROR_PARSE,
1595
0
                         _("“%s” is not a valid character following "
1596
0
                           "the characters “</”; “%s” may not begin an "
1597
0
                           "element name"),
1598
0
                         utf8_str (context->iter,
1599
0
                                   context->current_text_end - context->iter, buf),
1600
0
                         utf8_str (context->iter,
1601
0
                                   context->current_text_end - context->iter, buf));
1602
0
            }
1603
0
          break;
1604
1605
0
        case STATE_INSIDE_CLOSE_TAG_NAME:
1606
          /* Possible next state: AFTER_CLOSE_TAG_NAME */
1607
0
          advance_to_name_end (context);
1608
0
          add_to_partial (context, context->start, context->iter);
1609
1610
0
          if (context->iter != context->current_text_end)
1611
0
            context->state = STATE_AFTER_CLOSE_TAG_NAME;
1612
0
          break;
1613
1614
0
        case STATE_AFTER_CLOSE_TAG_NAME:
1615
          /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1616
1617
0
          skip_spaces (context);
1618
1619
0
          if (context->iter != context->current_text_end)
1620
0
            {
1621
0
              GString *close_name;
1622
1623
0
              close_name = context->partial_chunk;
1624
0
              context->partial_chunk = NULL;
1625
1626
0
              if (*context->iter != '>')
1627
0
                {
1628
0
                  gchar buf[8];
1629
1630
0
                  set_error (context,
1631
0
                             error,
1632
0
                             G_MARKUP_ERROR_PARSE,
1633
0
                             _("“%s” is not a valid character following "
1634
0
                               "the close element name “%s”; the allowed "
1635
0
                               "character is “>”"),
1636
0
                             utf8_str (context->iter,
1637
0
                                       context->current_text_end - context->iter, buf),
1638
0
                             close_name->str);
1639
0
                }
1640
0
              else if (context->tag_stack == NULL)
1641
0
                {
1642
0
                  set_error (context,
1643
0
                             error,
1644
0
                             G_MARKUP_ERROR_PARSE,
1645
0
                             _("Element “%s” was closed, no element "
1646
0
                               "is currently open"),
1647
0
                             close_name->str);
1648
0
                }
1649
0
              else if (strcmp (close_name->str, current_element (context)) != 0)
1650
0
                {
1651
0
                  set_error (context,
1652
0
                             error,
1653
0
                             G_MARKUP_ERROR_PARSE,
1654
0
                             _("Element “%s” was closed, but the currently "
1655
0
                               "open element is “%s”"),
1656
0
                             close_name->str,
1657
0
                             current_element (context));
1658
0
                }
1659
0
              else
1660
0
                {
1661
0
                  advance_char (context);
1662
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1663
0
                  context->start = NULL;
1664
1665
0
                  emit_end_element (context, error);
1666
0
                }
1667
0
              context->partial_chunk = close_name;
1668
0
              truncate_partial (context);
1669
0
            }
1670
0
          break;
1671
1672
0
        case STATE_INSIDE_PASSTHROUGH:
1673
          /* Possible next state: AFTER_CLOSE_ANGLE */
1674
0
          do
1675
0
            {
1676
0
              if (*context->iter == '<')
1677
0
                context->balance++;
1678
0
              if (*context->iter == '>')
1679
0
                {
1680
0
                  gchar *str;
1681
0
                  gsize len;
1682
1683
0
                  context->balance--;
1684
0
                  add_to_partial (context, context->start, context->iter);
1685
0
                  context->start = context->iter;
1686
1687
0
                  str = context->partial_chunk->str;
1688
0
                  len = context->partial_chunk->len;
1689
1690
0
                  if (str[1] == '?' && str[len - 1] == '?')
1691
0
                    break;
1692
0
                  if (strncmp (str, "<!--", 4) == 0 &&
1693
0
                      strcmp (str + len - 2, "--") == 0)
1694
0
                    break;
1695
0
                  if (strncmp (str, "<![CDATA[", 9) == 0 &&
1696
0
                      strcmp (str + len - 2, "]]") == 0)
1697
0
                    break;
1698
0
                  if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
1699
0
                      context->balance == 0)
1700
0
                    break;
1701
0
                }
1702
0
            }
1703
0
          while (advance_char (context));
1704
1705
0
          if (context->iter == context->current_text_end)
1706
0
            {
1707
              /* The passthrough hasn't necessarily ended. Merge with
1708
               * partial chunk, leave state unchanged.
1709
               */
1710
0
               add_to_partial (context, context->start, context->iter);
1711
0
            }
1712
0
          else
1713
0
            {
1714
              /* The passthrough has ended at the close angle. Combine
1715
               * it with the partial chunk if any. Call the passthrough
1716
               * callback. Note that the open/close angles are
1717
               * included in the text of the passthrough.
1718
               */
1719
0
              GError *tmp_error = NULL;
1720
1721
0
              advance_char (context); /* advance past close angle */
1722
0
              add_to_partial (context, context->start, context->iter);
1723
1724
0
              if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1725
0
                  strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0)
1726
0
                {
1727
0
                  if (context->parser->text &&
1728
0
                      text_validate (context,
1729
0
                                     context->partial_chunk->str + 9,
1730
0
                                     context->partial_chunk->len - 12,
1731
0
                                     error))
1732
0
                    (*context->parser->text) (context,
1733
0
                                              context->partial_chunk->str + 9,
1734
0
                                              context->partial_chunk->len - 12,
1735
0
                                              context->user_data,
1736
0
                                              &tmp_error);
1737
0
                }
1738
0
              else if (context->parser->passthrough &&
1739
0
                       text_validate (context,
1740
0
                                      context->partial_chunk->str,
1741
0
                                      context->partial_chunk->len,
1742
0
                                      error))
1743
0
                (*context->parser->passthrough) (context,
1744
0
                                                 context->partial_chunk->str,
1745
0
                                                 context->partial_chunk->len,
1746
0
                                                 context->user_data,
1747
0
                                                 &tmp_error);
1748
1749
0
              truncate_partial (context);
1750
1751
0
              if (tmp_error == NULL)
1752
0
                {
1753
0
                  context->state = STATE_AFTER_CLOSE_ANGLE;
1754
0
                  context->start = context->iter; /* could begin text */
1755
0
                }
1756
0
              else
1757
0
                propagate_error (context, error, tmp_error);
1758
0
            }
1759
0
          break;
1760
1761
0
        case STATE_ERROR:
1762
0
          goto finished;
1763
0
          break;
1764
1765
0
        default:
1766
0
          g_assert_not_reached ();
1767
0
          break;
1768
0
        }
1769
0
    }
1770
1771
0
 finished:
1772
0
  context->parsing = FALSE;
1773
1774
0
  return context->state != STATE_ERROR;
1775
0
}
1776
1777
/**
1778
 * g_markup_parse_context_end_parse:
1779
 * @context: a #GMarkupParseContext
1780
 * @error: return location for a #GError
1781
 *
1782
 * Signals to the #GMarkupParseContext that all data has been
1783
 * fed into the parse context with g_markup_parse_context_parse().
1784
 *
1785
 * This function reports an error if the document isn't complete,
1786
 * for example if elements are still open.
1787
 *
1788
 * Returns: %TRUE on success, %FALSE if an error was set
1789
 */
1790
gboolean
1791
g_markup_parse_context_end_parse (GMarkupParseContext  *context,
1792
                                  GError              **error)
1793
0
{
1794
0
  g_return_val_if_fail (context != NULL, FALSE);
1795
0
  g_return_val_if_fail (!context->parsing, FALSE);
1796
0
  g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1797
1798
0
  if (context->partial_chunk != NULL)
1799
0
    {
1800
0
      g_string_free (context->partial_chunk, TRUE);
1801
0
      context->partial_chunk = NULL;
1802
0
    }
1803
1804
0
  if (context->document_empty)
1805
0
    {
1806
0
      set_error_literal (context, error, G_MARKUP_ERROR_EMPTY,
1807
0
                         _("Document was empty or contained only whitespace"));
1808
0
      return FALSE;
1809
0
    }
1810
1811
0
  context->parsing = TRUE;
1812
1813
0
  switch (context->state)
1814
0
    {
1815
0
    case STATE_START:
1816
      /* Nothing to do */
1817
0
      break;
1818
1819
0
    case STATE_AFTER_OPEN_ANGLE:
1820
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1821
0
                         _("Document ended unexpectedly just after an open angle bracket “<”"));
1822
0
      break;
1823
1824
0
    case STATE_AFTER_CLOSE_ANGLE:
1825
0
      if (context->tag_stack != NULL)
1826
0
        {
1827
          /* Error message the same as for INSIDE_TEXT */
1828
0
          set_error (context, error, G_MARKUP_ERROR_PARSE,
1829
0
                     _("Document ended unexpectedly with elements still open — "
1830
0
                       "“%s” was the last element opened"),
1831
0
                     current_element (context));
1832
0
        }
1833
0
      break;
1834
1835
0
    case STATE_AFTER_ELISION_SLASH:
1836
0
      set_error (context, error, G_MARKUP_ERROR_PARSE,
1837
0
                 _("Document ended unexpectedly, expected to see a close angle "
1838
0
                   "bracket ending the tag <%s/>"), current_element (context));
1839
0
      break;
1840
1841
0
    case STATE_INSIDE_OPEN_TAG_NAME:
1842
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1843
0
                         _("Document ended unexpectedly inside an element name"));
1844
0
      break;
1845
1846
0
    case STATE_INSIDE_ATTRIBUTE_NAME:
1847
0
    case STATE_AFTER_ATTRIBUTE_NAME:
1848
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1849
0
                         _("Document ended unexpectedly inside an attribute name"));
1850
0
      break;
1851
1852
0
    case STATE_BETWEEN_ATTRIBUTES:
1853
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1854
0
                         _("Document ended unexpectedly inside an element-opening "
1855
0
                           "tag."));
1856
0
      break;
1857
1858
0
    case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1859
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1860
0
                         _("Document ended unexpectedly after the equals sign "
1861
0
                           "following an attribute name; no attribute value"));
1862
0
      break;
1863
1864
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1865
0
    case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1866
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1867
0
                         _("Document ended unexpectedly while inside an attribute "
1868
0
                           "value"));
1869
0
      break;
1870
1871
0
    case STATE_INSIDE_TEXT:
1872
0
      g_assert (context->tag_stack != NULL);
1873
0
      set_error (context, error, G_MARKUP_ERROR_PARSE,
1874
0
                 _("Document ended unexpectedly with elements still open — "
1875
0
                   "“%s” was the last element opened"),
1876
0
                 current_element (context));
1877
0
      break;
1878
1879
0
    case STATE_AFTER_CLOSE_TAG_SLASH:
1880
0
    case STATE_INSIDE_CLOSE_TAG_NAME:
1881
0
    case STATE_AFTER_CLOSE_TAG_NAME:
1882
0
      if (context->tag_stack != NULL)
1883
0
        set_error (context, error, G_MARKUP_ERROR_PARSE,
1884
0
                   _("Document ended unexpectedly inside the close tag for "
1885
0
                     "element “%s”"), current_element (context));
1886
0
      else
1887
0
        set_error (context, error, G_MARKUP_ERROR_PARSE,
1888
0
                   _("Document ended unexpectedly inside the close tag for an "
1889
0
                     "unopened element"));
1890
0
      break;
1891
1892
0
    case STATE_INSIDE_PASSTHROUGH:
1893
0
      set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1894
0
                         _("Document ended unexpectedly inside a comment or "
1895
0
                           "processing instruction"));
1896
0
      break;
1897
1898
0
    case STATE_ERROR:
1899
0
    default:
1900
0
      g_assert_not_reached ();
1901
0
      break;
1902
0
    }
1903
1904
0
  context->parsing = FALSE;
1905
1906
0
  return context->state != STATE_ERROR;
1907
0
}
1908
1909
/**
1910
 * g_markup_parse_context_get_element:
1911
 * @context: a #GMarkupParseContext
1912
 *
1913
 * Retrieves the name of the currently open element.
1914
 *
1915
 * If called from the start_element or end_element handlers this will
1916
 * give the element_name as passed to those functions. For the parent
1917
 * elements, see g_markup_parse_context_get_element_stack().
1918
 *
1919
 * Returns: the name of the currently open element, or %NULL
1920
 *
1921
 * Since: 2.2
1922
 */
1923
const gchar *
1924
g_markup_parse_context_get_element (GMarkupParseContext *context)
1925
0
{
1926
0
  g_return_val_if_fail (context != NULL, NULL);
1927
1928
0
  if (context->tag_stack == NULL)
1929
0
    return NULL;
1930
0
  else
1931
0
    return current_element (context);
1932
0
}
1933
1934
/**
1935
 * g_markup_parse_context_get_element_stack:
1936
 * @context: a #GMarkupParseContext
1937
 *
1938
 * Retrieves the element stack from the internal state of the parser.
1939
 *
1940
 * The returned #GSList is a list of strings where the first item is
1941
 * the currently open tag (as would be returned by
1942
 * g_markup_parse_context_get_element()) and the next item is its
1943
 * immediate parent.
1944
 *
1945
 * This function is intended to be used in the start_element and
1946
 * end_element handlers where g_markup_parse_context_get_element()
1947
 * would merely return the name of the element that is being
1948
 * processed.
1949
 *
1950
 * Returns: (element-type utf8): the element stack, which must not be modified
1951
 *
1952
 * Since: 2.16
1953
 */
1954
const GSList *
1955
g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1956
0
{
1957
0
  g_return_val_if_fail (context != NULL, NULL);
1958
0
  return context->tag_stack;
1959
0
}
1960
1961
/**
1962
 * g_markup_parse_context_get_position:
1963
 * @context: a #GMarkupParseContext
1964
 * @line_number: (out) (optional): return location for a line number, or %NULL
1965
 * @char_number: (out) (optional): return location for a char-on-line number, or %NULL
1966
 *
1967
 * Retrieves the current line number and the number of the character on
1968
 * that line. Intended for use in error messages; there are no strict
1969
 * semantics for what constitutes the "current" line number other than
1970
 * "the best number we could come up with for error messages."
1971
 */
1972
void
1973
g_markup_parse_context_get_position (GMarkupParseContext *context,
1974
                                     gint                *line_number,
1975
                                     gint                *char_number)
1976
0
{
1977
0
  g_return_if_fail (context != NULL);
1978
1979
0
  if (line_number)
1980
0
    *line_number = context->line_number;
1981
1982
0
  if (char_number)
1983
0
    *char_number = context->char_number;
1984
0
}
1985
1986
/**
1987
 * g_markup_parse_context_get_offset:
1988
 * @context: a #GMarkupParseContext
1989
 *
1990
 * Retrieves the current offset from the beginning of the document,
1991
 * in bytes.
1992
 *
1993
 * The information is meant to accompany the values returned by
1994
 * [method@GLib.MarkupParseContext.get_position], and comes with the
1995
 * same accuracy guarantees.
1996
 *
1997
 * Returns: the offset
1998
 *
1999
 * Since: 2.88
2000
 */
2001
gsize
2002
g_markup_parse_context_get_offset (GMarkupParseContext *context)
2003
0
{
2004
0
  g_return_val_if_fail (context != NULL, 0);
2005
2006
0
  return context->offset;
2007
0
}
2008
2009
/**
2010
 * g_markup_parse_context_get_tag_start:
2011
 * @context: a #GMarkupParseContext
2012
 * @line_number: (out): return location for the line number
2013
 * @char_number: (out): return location for the character number
2014
 * @offset: (out): return location for offset from the beginning of the document
2015
 *
2016
 * Retrieves the start position of the current start or end tag.
2017
 *
2018
 * This function can be used in the `start_element` or `end_element`
2019
 * callbacks to obtain location information for error reporting.
2020
 *
2021
 * Note that @line_number and @char_number are intended for human
2022
 * readable error messages and are therefore 1-based and in Unicode
2023
 * characters. @offset on the other hand is meant for programmatic
2024
 * use, and thus is 0-based and in bytes.
2025
 *
2026
 * The information is meant to accompany the values returned by
2027
 * [method@GLib.MarkupParseContext.get_position], and comes with the
2028
 * same accuracy guarantees.
2029
 *
2030
 * Since: 2.88
2031
 */
2032
void
2033
g_markup_parse_context_get_tag_start (GMarkupParseContext *context,
2034
                                      gsize               *line_number,
2035
                                      gsize               *char_number,
2036
                                      gsize               *offset)
2037
0
{
2038
0
  g_return_if_fail (context != NULL);
2039
0
  g_return_if_fail (line_number != NULL);
2040
0
  g_return_if_fail (char_number != NULL);
2041
0
  g_return_if_fail (offset != NULL);
2042
2043
0
  *line_number = context->tag_line;
2044
0
  *char_number = context->tag_char;
2045
0
  *offset = context->tag_offset;
2046
0
}
2047
2048
/**
2049
 * g_markup_parse_context_get_user_data:
2050
 * @context: a #GMarkupParseContext
2051
 *
2052
 * Returns the user_data associated with @context.
2053
 *
2054
 * This will either be the user_data that was provided to
2055
 * g_markup_parse_context_new() or to the most recent call
2056
 * of g_markup_parse_context_push().
2057
 *
2058
 * Returns: the provided user_data. The returned data belongs to
2059
 *     the markup context and will be freed when
2060
 *     g_markup_parse_context_free() is called.
2061
 *
2062
 * Since: 2.18
2063
 */
2064
gpointer
2065
g_markup_parse_context_get_user_data (GMarkupParseContext *context)
2066
0
{
2067
0
  return context->user_data;
2068
0
}
2069
2070
/**
2071
 * g_markup_parse_context_push:
2072
 * @context: a #GMarkupParseContext
2073
 * @parser: a #GMarkupParser
2074
 * @user_data: user data to pass to #GMarkupParser functions
2075
 *
2076
 * Temporarily redirects markup data to a sub-parser.
2077
 *
2078
 * This function may only be called from the start_element handler of
2079
 * a #GMarkupParser. It must be matched with a corresponding call to
2080
 * g_markup_parse_context_pop() in the matching end_element handler
2081
 * (except in the case that the parser aborts due to an error).
2082
 *
2083
 * All tags, text and other data between the matching tags is
2084
 * redirected to the subparser given by @parser. @user_data is used
2085
 * as the user_data for that parser. @user_data is also passed to the
2086
 * error callback in the event that an error occurs. This includes
2087
 * errors that occur in subparsers of the subparser.
2088
 *
2089
 * The end tag matching the start tag for which this call was made is
2090
 * handled by the previous parser (which is given its own user_data)
2091
 * which is why g_markup_parse_context_pop() is provided to allow "one
2092
 * last access" to the @user_data provided to this function. In the
2093
 * case of error, the @user_data provided here is passed directly to
2094
 * the error callback of the subparser and g_markup_parse_context_pop()
2095
 * should not be called. In either case, if @user_data was allocated
2096
 * then it ought to be freed from both of these locations.
2097
 *
2098
 * This function is not intended to be directly called by users
2099
 * interested in invoking subparsers. Instead, it is intended to be
2100
 * used by the subparsers themselves to implement a higher-level
2101
 * interface.
2102
 *
2103
 * As an example, see the following implementation of a simple
2104
 * parser that counts the number of tags encountered.
2105
 *
2106
 * |[<!-- language="C" --> 
2107
 * typedef struct
2108
 * {
2109
 *   gint tag_count;
2110
 * } CounterData;
2111
 *
2112
 * static void
2113
 * counter_start_element (GMarkupParseContext  *context,
2114
 *                        const gchar          *element_name,
2115
 *                        const gchar         **attribute_names,
2116
 *                        const gchar         **attribute_values,
2117
 *                        gpointer              user_data,
2118
 *                        GError              **error)
2119
 * {
2120
 *   CounterData *data = user_data;
2121
 *
2122
 *   data->tag_count++;
2123
 * }
2124
 *
2125
 * static void
2126
 * counter_error (GMarkupParseContext *context,
2127
 *                GError              *error,
2128
 *                gpointer             user_data)
2129
 * {
2130
 *   CounterData *data = user_data;
2131
 *
2132
 *   g_slice_free (CounterData, data);
2133
 * }
2134
 *
2135
 * static GMarkupParser counter_subparser =
2136
 * {
2137
 *   counter_start_element,
2138
 *   NULL,
2139
 *   NULL,
2140
 *   NULL,
2141
 *   counter_error
2142
 * };
2143
 * ]|
2144
 *
2145
 * In order to allow this parser to be easily used as a subparser, the
2146
 * following interface is provided:
2147
 *
2148
 * |[<!-- language="C" --> 
2149
 * void
2150
 * start_counting (GMarkupParseContext *context)
2151
 * {
2152
 *   CounterData *data = g_slice_new (CounterData);
2153
 *
2154
 *   data->tag_count = 0;
2155
 *   g_markup_parse_context_push (context, &counter_subparser, data);
2156
 * }
2157
 *
2158
 * gint
2159
 * end_counting (GMarkupParseContext *context)
2160
 * {
2161
 *   CounterData *data = g_markup_parse_context_pop (context);
2162
 *   int result;
2163
 *
2164
 *   result = data->tag_count;
2165
 *   g_slice_free (CounterData, data);
2166
 *
2167
 *   return result;
2168
 * }
2169
 * ]|
2170
 *
2171
 * The subparser would then be used as follows:
2172
 *
2173
 * |[<!-- language="C" --> 
2174
 * static void start_element (context, element_name, ...)
2175
 * {
2176
 *   if (strcmp (element_name, "count-these") == 0)
2177
 *     start_counting (context);
2178
 *
2179
 *   // else, handle other tags...
2180
 * }
2181
 *
2182
 * static void end_element (context, element_name, ...)
2183
 * {
2184
 *   if (strcmp (element_name, "count-these") == 0)
2185
 *     g_print ("Counted %d tags\n", end_counting (context));
2186
 *
2187
 *   // else, handle other tags...
2188
 * }
2189
 * ]|
2190
 *
2191
 * Since: 2.18
2192
 **/
2193
void
2194
g_markup_parse_context_push (GMarkupParseContext *context,
2195
                             const GMarkupParser *parser,
2196
                             gpointer             user_data)
2197
0
{
2198
0
  GMarkupRecursionTracker *tracker;
2199
2200
0
  tracker = g_slice_new (GMarkupRecursionTracker);
2201
0
  tracker->prev_element = context->subparser_element;
2202
0
  tracker->prev_parser = context->parser;
2203
0
  tracker->prev_user_data = context->user_data;
2204
2205
0
  context->subparser_element = current_element (context);
2206
0
  context->parser = parser;
2207
0
  context->user_data = user_data;
2208
2209
0
  context->subparser_stack = g_slist_prepend (context->subparser_stack,
2210
0
                                              tracker);
2211
0
}
2212
2213
/**
2214
 * g_markup_parse_context_pop:
2215
 * @context: a #GMarkupParseContext
2216
 *
2217
 * Completes the process of a temporary sub-parser redirection.
2218
 *
2219
 * This function exists to collect the user_data allocated by a
2220
 * matching call to g_markup_parse_context_push(). It must be called
2221
 * in the end_element handler corresponding to the start_element
2222
 * handler during which g_markup_parse_context_push() was called.
2223
 * You must not call this function from the error callback -- the
2224
 * @user_data is provided directly to the callback in that case.
2225
 *
2226
 * This function is not intended to be directly called by users
2227
 * interested in invoking subparsers. Instead, it is intended to
2228
 * be used by the subparsers themselves to implement a higher-level
2229
 * interface.
2230
 *
2231
 * Returns: the user data passed to g_markup_parse_context_push()
2232
 *
2233
 * Since: 2.18
2234
 */
2235
gpointer
2236
g_markup_parse_context_pop (GMarkupParseContext *context)
2237
0
{
2238
0
  gpointer user_data;
2239
2240
0
  if (!context->awaiting_pop)
2241
0
    possibly_finish_subparser (context);
2242
2243
0
  g_assert (context->awaiting_pop);
2244
2245
0
  context->awaiting_pop = FALSE;
2246
2247
  /* valgrind friendliness */
2248
0
  user_data = context->held_user_data;
2249
0
  context->held_user_data = NULL;
2250
2251
0
  return user_data;
2252
0
}
2253
2254
#define APPEND_TEXT_AND_SEEK(_str, _start, _end)          \
2255
160M
  G_STMT_START {                                          \
2256
160M
    if (_end > _start)                                    \
2257
160M
      g_string_append_len (_str, _start, _end - _start);  \
2258
160M
    _start = ++_end;                                      \
2259
160M
  } G_STMT_END
2260
2261
/*
2262
 * https://www.w3.org/TR/REC-xml/ defines the set of valid
2263
 * characters as:
2264
 *   #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
2265
 *
2266
 * That is, from non-ASCII UTF-8 character set, only 0xC27F - 0xC284 and
2267
 * 0xC286 - 0xC29F have to be escaped (excluding the surrogate blocks).
2268
 * Corresponding Unicode code points are [0x7F-0x84] and [0x86-0x9F].
2269
 *
2270
 * So instead of using costly g_utf8_next_char or similar UTF8 functions, it's
2271
 * better to read each byte, and make an exception for 0xC2XX.
2272
 */
2273
static void
2274
append_escaped_text (GString     *str,
2275
                     const gchar *text,
2276
                     gssize       length)
2277
3.70M
{
2278
3.70M
  const gchar *p, *pending;
2279
3.70M
  const gchar *end;
2280
2281
3.70M
  p = pending = text;
2282
3.70M
  end = text + length;
2283
2284
340M
  while (p < end && pending < end)
2285
336M
    {
2286
336M
      guchar c = (guchar) *pending;
2287
2288
336M
      switch (c)
2289
336M
        {
2290
91.7M
        case '&':
2291
91.7M
          APPEND_TEXT_AND_SEEK (str, p, pending);
2292
91.7M
          g_string_append (str, "&amp;");
2293
91.7M
          break;
2294
2295
33.3M
        case '<':
2296
33.3M
          APPEND_TEXT_AND_SEEK (str, p, pending);
2297
33.3M
          g_string_append (str, "&lt;");
2298
33.3M
          break;
2299
2300
10.0M
        case '>':
2301
10.0M
          APPEND_TEXT_AND_SEEK (str, p, pending);
2302
10.0M
          g_string_append (str, "&gt;");
2303
10.0M
          break;
2304
2305
3.09M
        case '\'':
2306
3.09M
          APPEND_TEXT_AND_SEEK (str, p, pending);
2307
3.09M
          g_string_append (str, "&apos;");
2308
3.09M
          break;
2309
2310
2.53M
        case '"':
2311
2.53M
          APPEND_TEXT_AND_SEEK (str, p, pending);
2312
2.53M
          g_string_append (str, "&quot;");
2313
2.53M
          break;
2314
2315
196M
        default:
2316
196M
          if ((0x1 <= c && c <= 0x8) ||
2317
192M
              (0xb <= c && c  <= 0xc) ||
2318
192M
              (0xe <= c && c <= 0x1f) ||
2319
176M
              (c == 0x7f))
2320
19.8M
            {
2321
19.8M
              APPEND_TEXT_AND_SEEK (str, p, pending);
2322
19.8M
              g_string_append_printf (str, "&#x%x;", c);
2323
19.8M
            }
2324
          /* The utf-8 control characters to escape begins with 0xc2 byte */
2325
176M
          else if (c == 0xc2)
2326
156k
            {
2327
156k
              gunichar u = g_utf8_get_char (pending);
2328
2329
156k
              if ((0x7f < u && u <= 0x84) ||
2330
151k
                  (0x86 <= u && u <= 0x9f))
2331
26.3k
                {
2332
26.3k
                  APPEND_TEXT_AND_SEEK (str, p, pending);
2333
26.3k
                  g_string_append_printf (str, "&#x%x;", u);
2334
2335
                  /*
2336
                   * We have appended a two byte character above, which
2337
                   * is one byte ahead of what we read on every loop.
2338
                   * Increment to skip 0xc2 and point to the right location.
2339
                   */
2340
26.3k
                  p++;
2341
26.3k
                }
2342
129k
              else
2343
129k
                pending++;
2344
156k
            }
2345
176M
          else
2346
176M
            pending++;
2347
196M
          break;
2348
336M
        }
2349
336M
    }
2350
2351
3.70M
  if (pending > p)
2352
1.11M
    g_string_append_len (str, p, pending - p);
2353
3.70M
}
2354
2355
#undef APPEND_TEXT_AND_SEEK
2356
2357
/**
2358
 * g_markup_escape_text:
2359
 * @text: some valid UTF-8 text
2360
 * @length: length of @text in bytes, or -1 if the text is nul-terminated
2361
 *
2362
 * Escapes text so that the markup parser will parse it verbatim.
2363
 * Less than, greater than, ampersand, etc. are replaced with the
2364
 * corresponding entities. This function would typically be used
2365
 * when writing out a file to be parsed with the markup parser.
2366
 *
2367
 * Note that this function doesn't protect whitespace and line endings
2368
 * from being processed according to the XML rules for normalization
2369
 * of line endings and attribute values.
2370
 *
2371
 * Note also that this function will produce character references in
2372
 * the range of &#x1; ... &#x1f; for all control sequences
2373
 * except for tabstop, newline and carriage return.  The character
2374
 * references in this range are not valid XML 1.0, but they are
2375
 * valid XML 1.1 and will be accepted by the GMarkup parser.
2376
 *
2377
 * Returns: a newly allocated string with the escaped text
2378
 */
2379
gchar*
2380
g_markup_escape_text (const gchar *text,
2381
                      gssize       length)
2382
3.70M
{
2383
3.70M
  GString *str;
2384
2385
3.70M
  g_return_val_if_fail (text != NULL, NULL);
2386
2387
3.70M
  if (length < 0)
2388
3.70M
    length = strlen (text);
2389
2390
  /* prealloc at least as long as original text */
2391
3.70M
  str = g_string_sized_new (length);
2392
3.70M
  append_escaped_text (str, text, length);
2393
2394
3.70M
  return g_string_free (str, FALSE);
2395
3.70M
}
2396
2397
/*
2398
 * find_conversion:
2399
 * @format: a printf-style format string
2400
 * @after: location to store a pointer to the character after
2401
 *     the returned conversion. On a %NULL return, returns the
2402
 *     pointer to the trailing NUL in the string
2403
 *
2404
 * Find the next conversion in a printf-style format string.
2405
 * Partially based on code from printf-parser.c,
2406
 * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2407
 *
2408
 * Returns: pointer to the next conversion in @format,
2409
 *  or %NULL, if none.
2410
 */
2411
static const char *
2412
find_conversion (const char  *format,
2413
                 const char **after)
2414
0
{
2415
0
  const char *start = format;
2416
0
  const char *cp;
2417
2418
0
  while (*start != '\0' && *start != '%')
2419
0
    start++;
2420
2421
0
  if (*start == '\0')
2422
0
    {
2423
0
      *after = start;
2424
0
      return NULL;
2425
0
    }
2426
2427
0
  cp = start + 1;
2428
2429
0
  if (*cp == '\0')
2430
0
    {
2431
0
      *after = cp;
2432
0
      return NULL;
2433
0
    }
2434
2435
  /* Test for positional argument.  */
2436
0
  if (*cp >= '0' && *cp <= '9')
2437
0
    {
2438
0
      const char *np;
2439
2440
0
      for (np = cp; *np >= '0' && *np <= '9'; np++)
2441
0
        ;
2442
0
      if (*np == '$')
2443
0
        cp = np + 1;
2444
0
    }
2445
2446
  /* Skip the flags.  */
2447
0
  for (;;)
2448
0
    {
2449
0
      if (*cp == '\'' ||
2450
0
          *cp == '-' ||
2451
0
          *cp == '+' ||
2452
0
          *cp == ' ' ||
2453
0
          *cp == '#' ||
2454
0
          *cp == '0')
2455
0
        cp++;
2456
0
      else
2457
0
        break;
2458
0
    }
2459
2460
  /* Skip the field width.  */
2461
0
  if (*cp == '*')
2462
0
    {
2463
0
      cp++;
2464
2465
      /* Test for positional argument.  */
2466
0
      if (*cp >= '0' && *cp <= '9')
2467
0
        {
2468
0
          const char *np;
2469
2470
0
          for (np = cp; *np >= '0' && *np <= '9'; np++)
2471
0
            ;
2472
0
          if (*np == '$')
2473
0
            cp = np + 1;
2474
0
        }
2475
0
    }
2476
0
  else
2477
0
    {
2478
0
      for (; *cp >= '0' && *cp <= '9'; cp++)
2479
0
        ;
2480
0
    }
2481
2482
  /* Skip the precision.  */
2483
0
  if (*cp == '.')
2484
0
    {
2485
0
      cp++;
2486
0
      if (*cp == '*')
2487
0
        {
2488
          /* Test for positional argument.  */
2489
0
          if (*cp >= '0' && *cp <= '9')
2490
0
            {
2491
0
              const char *np;
2492
2493
0
              for (np = cp; *np >= '0' && *np <= '9'; np++)
2494
0
                ;
2495
0
              if (*np == '$')
2496
0
                cp = np + 1;
2497
0
            }
2498
0
        }
2499
0
      else
2500
0
        {
2501
0
          for (; *cp >= '0' && *cp <= '9'; cp++)
2502
0
            ;
2503
0
        }
2504
0
    }
2505
2506
  /* Skip argument type/size specifiers.  */
2507
0
  while (*cp == 'h' ||
2508
0
         *cp == 'L' ||
2509
0
         *cp == 'l' ||
2510
0
         *cp == 'j' ||
2511
0
         *cp == 'z' ||
2512
0
         *cp == 'Z' ||
2513
0
         *cp == 't')
2514
0
    cp++;
2515
2516
  /* Skip the conversion character.  */
2517
0
  cp++;
2518
2519
0
  *after = cp;
2520
0
  return start;
2521
0
}
2522
2523
/**
2524
 * g_markup_vprintf_escaped:
2525
 * @format: printf() style format string
2526
 * @args: variable argument list, similar to vprintf()
2527
 *
2528
 * Formats the data in @args according to @format, escaping
2529
 * all string and character arguments in the fashion
2530
 * of g_markup_escape_text(). See g_markup_printf_escaped().
2531
 *
2532
 * Returns: newly allocated result from formatting
2533
 *  operation. Free with g_free().
2534
 *
2535
 * Since: 2.4
2536
 */
2537
#pragma GCC diagnostic push
2538
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
2539
2540
gchar *
2541
g_markup_vprintf_escaped (const gchar *format,
2542
                          va_list      args)
2543
0
{
2544
0
  GString *format1;
2545
0
  GString *format2;
2546
0
  GString *result = NULL;
2547
0
  gchar *output1 = NULL;
2548
0
  gchar *output2 = NULL;
2549
0
  const char *p, *op1, *op2;
2550
0
  va_list args2;
2551
2552
  /* The technique here, is that we make two format strings that
2553
   * have the identical conversions in the identical order to the
2554
   * original strings, but differ in the text in-between. We
2555
   * then use the normal g_strdup_vprintf() to format the arguments
2556
   * with the two new format strings. By comparing the results,
2557
   * we can figure out what segments of the output come from
2558
   * the original format string, and what from the arguments,
2559
   * and thus know what portions of the string to escape.
2560
   *
2561
   * For instance, for:
2562
   *
2563
   *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2564
   *
2565
   * We form the two format strings "%sX%dX" and %sY%sY". The results
2566
   * of formatting with those two strings are
2567
   *
2568
   * "%sX%dX" => "Susan & FredX5X"
2569
   * "%sY%dY" => "Susan & FredY5Y"
2570
   *
2571
   * To find the span of the first argument, we find the first position
2572
   * where the two arguments differ, which tells us that the first
2573
   * argument formatted to "Susan & Fred". We then escape that
2574
   * to "Susan & Fred" and join up with the intermediate portions
2575
   * of the format string and the second argument to get
2576
   * "Susan & Fred ate 5 apples".
2577
   */
2578
2579
  /* Create the two modified format strings
2580
   */
2581
0
  format1 = g_string_new (NULL);
2582
0
  format2 = g_string_new (NULL);
2583
0
  p = format;
2584
0
  while (TRUE)
2585
0
    {
2586
0
      const char *after;
2587
0
      const char *conv = find_conversion (p, &after);
2588
0
      if (!conv)
2589
0
        break;
2590
2591
0
      g_string_append_len (format1, conv, after - conv);
2592
0
      g_string_append_c (format1, 'X');
2593
0
      g_string_append_len (format2, conv, after - conv);
2594
0
      g_string_append_c (format2, 'Y');
2595
2596
0
      p = after;
2597
0
    }
2598
2599
  /* Use them to format the arguments
2600
   */
2601
0
  va_copy (args2, args);
2602
2603
0
  output1 = g_strdup_vprintf (format1->str, args);
2604
2605
0
  if (!output1)
2606
0
    {
2607
0
      va_end (args2);
2608
0
      goto cleanup;
2609
0
    }
2610
2611
0
  output2 = g_strdup_vprintf (format2->str, args2);
2612
0
  va_end (args2);
2613
0
  if (!output2)
2614
0
    goto cleanup;
2615
0
  result = g_string_new (NULL);
2616
2617
  /* Iterate through the original format string again,
2618
   * copying the non-conversion portions and the escaped
2619
   * converted arguments to the output string.
2620
   */
2621
0
  op1 = output1;
2622
0
  op2 = output2;
2623
0
  p = format;
2624
0
  while (TRUE)
2625
0
    {
2626
0
      const char *after;
2627
0
      const char *output_start;
2628
0
      const char *conv = find_conversion (p, &after);
2629
0
      char *escaped;
2630
2631
0
      if (!conv)        /* The end, after points to the trailing \0 */
2632
0
        {
2633
0
          g_string_append_len (result, p, after - p);
2634
0
          break;
2635
0
        }
2636
2637
0
      g_string_append_len (result, p, conv - p);
2638
0
      output_start = op1;
2639
0
      while (*op1 == *op2)
2640
0
        {
2641
0
          op1++;
2642
0
          op2++;
2643
0
        }
2644
2645
0
      escaped = g_markup_escape_text (output_start, op1 - output_start);
2646
0
      g_string_append (result, escaped);
2647
0
      g_free (escaped);
2648
2649
0
      p = after;
2650
0
      op1++;
2651
0
      op2++;
2652
0
    }
2653
2654
0
 cleanup:
2655
0
  g_string_free (format1, TRUE);
2656
0
  g_string_free (format2, TRUE);
2657
0
  g_free (output1);
2658
0
  g_free (output2);
2659
2660
0
  if (result)
2661
0
    return g_string_free (result, FALSE);
2662
0
  else
2663
0
    return NULL;
2664
0
}
2665
2666
#pragma GCC diagnostic pop
2667
2668
/**
2669
 * g_markup_printf_escaped:
2670
 * @format: printf() style format string
2671
 * @...: the arguments to insert in the format string
2672
 *
2673
 * Formats arguments according to @format, escaping
2674
 * all string and character arguments in the fashion
2675
 * of g_markup_escape_text(). This is useful when you
2676
 * want to insert literal strings into XML-style markup
2677
 * output, without having to worry that the strings
2678
 * might themselves contain markup.
2679
 *
2680
 * |[<!-- language="C" --> 
2681
 * const char *store = "Fortnum & Mason";
2682
 * const char *item = "Tea";
2683
 * char *output;
2684
 * 
2685
 * output = g_markup_printf_escaped ("<purchase>"
2686
 *                                   "<store>%s</store>"
2687
 *                                   "<item>%s</item>"
2688
 *                                   "</purchase>",
2689
 *                                   store, item);
2690
 * ]|
2691
 *
2692
 * Returns: newly allocated result from formatting
2693
 *    operation. Free with g_free().
2694
 *
2695
 * Since: 2.4
2696
 */
2697
gchar *
2698
g_markup_printf_escaped (const gchar *format, ...)
2699
0
{
2700
0
  char *result;
2701
0
  va_list args;
2702
2703
0
  va_start (args, format);
2704
0
  result = g_markup_vprintf_escaped (format, args);
2705
0
  va_end (args);
2706
2707
0
  return result;
2708
0
}
2709
2710
static gboolean
2711
g_markup_parse_boolean (const char  *string,
2712
                        gboolean    *value)
2713
0
{
2714
0
  char const * const falses[] = { "false", "f", "no", "n", "0" };
2715
0
  char const * const trues[] = { "true", "t", "yes", "y", "1" };
2716
0
  gsize i;
2717
2718
0
  for (i = 0; i < G_N_ELEMENTS (falses); i++)
2719
0
    {
2720
0
      if (g_ascii_strcasecmp (string, falses[i]) == 0)
2721
0
        {
2722
0
          if (value != NULL)
2723
0
            *value = FALSE;
2724
2725
0
          return TRUE;
2726
0
        }
2727
0
    }
2728
2729
0
  for (i = 0; i < G_N_ELEMENTS (trues); i++)
2730
0
    {
2731
0
      if (g_ascii_strcasecmp (string, trues[i]) == 0)
2732
0
        {
2733
0
          if (value != NULL)
2734
0
            *value = TRUE;
2735
2736
0
          return TRUE;
2737
0
        }
2738
0
    }
2739
2740
0
  return FALSE;
2741
0
}
2742
2743
/**
2744
 * GMarkupCollectType:
2745
 * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2746
 *     to collect
2747
 * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2748
 *     the attribute_values[] array. Expects a parameter of type (const
2749
 *     char **). If %G_MARKUP_COLLECT_OPTIONAL is specified and the
2750
 *     attribute isn't present then the pointer will be set to %NULL
2751
 * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2752
 *     expects a parameter of type (char **) and g_strdup()s the
2753
 *     returned pointer. The pointer must be freed with g_free()
2754
 * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (`gboolean *`)
2755
 *     and parses the attribute value as a boolean. Sets %FALSE if the
2756
 *     attribute isn't present. Valid boolean values consist of
2757
 *     (case-insensitive) "false", "f", "no", "n", "0" and "true", "t",
2758
 *     "yes", "y", "1"
2759
 * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2760
 *     in the case of a missing attribute a value is set that compares
2761
 *     equal to neither %FALSE nor %TRUE %G_MARKUP_COLLECT_OPTIONAL is
2762
 *     implied
2763
 * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other fields.
2764
 *     If present, allows the attribute not to appear. A default value
2765
 *     is set depending on what value type is used
2766
 *
2767
 * A mixed enumerated type and flags field. You must specify one type
2768
 * (string, strdup, boolean, tristate).  Additionally, you may  optionally
2769
 * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
2770
 *
2771
 * It is likely that this enum will be extended in the future to
2772
 * support other types.
2773
 */
2774
2775
/**
2776
 * g_markup_collect_attributes:
2777
 * @element_name: the current tag name
2778
 * @attribute_names: the attribute names
2779
 * @attribute_values: the attribute values
2780
 * @error: a pointer to a #GError or %NULL
2781
 * @first_type: the #GMarkupCollectType of the first attribute
2782
 * @first_attr: the name of the first attribute
2783
 * @...: a pointer to the storage location of the first attribute
2784
 *     (or %NULL), followed by more types names and pointers, ending
2785
 *     with %G_MARKUP_COLLECT_INVALID
2786
 *
2787
 * Collects the attributes of the element from the data passed to the
2788
 * #GMarkupParser start_element function, dealing with common error
2789
 * conditions and supporting boolean values.
2790
 *
2791
 * This utility function is not required to write a parser but can save
2792
 * a lot of typing.
2793
 *
2794
 * The @element_name, @attribute_names, @attribute_values and @error
2795
 * parameters passed to the start_element callback should be passed
2796
 * unmodified to this function.
2797
 *
2798
 * Following these arguments is a list of "supported" attributes to collect.
2799
 * It is an error to specify multiple attributes with the same name. If any
2800
 * attribute not in the list appears in the @attribute_names array then an
2801
 * unknown attribute error will result.
2802
 *
2803
 * The #GMarkupCollectType field allows specifying the type of collection
2804
 * to perform and if a given attribute must appear or is optional.
2805
 *
2806
 * The attribute name is simply the name of the attribute to collect.
2807
 *
2808
 * The pointer should be of the appropriate type (see the descriptions
2809
 * under #GMarkupCollectType) and may be %NULL in case a particular
2810
 * attribute is to be allowed but ignored.
2811
 *
2812
 * This function deals with issuing errors for missing attributes
2813
 * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2814
 * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2815
 * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2816
 * as parse errors for boolean-valued attributes (again of type
2817
 * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2818
 * will be returned and @error will be set as appropriate.
2819
 *
2820
 * Returns: %TRUE if successful
2821
 *
2822
 * Since: 2.16
2823
 **/
2824
gboolean
2825
g_markup_collect_attributes (const gchar         *element_name,
2826
                             const gchar        **attribute_names,
2827
                             const gchar        **attribute_values,
2828
                             GError             **error,
2829
                             GMarkupCollectType   first_type,
2830
                             const gchar         *first_attr,
2831
                             ...)
2832
0
{
2833
0
  GMarkupCollectType type;
2834
0
  const gchar *attr;
2835
0
  guint64 collected;
2836
0
  int written;
2837
0
  va_list ap;
2838
0
  int i;
2839
2840
0
  type = first_type;
2841
0
  attr = first_attr;
2842
0
  collected = 0;
2843
0
  written = 0;
2844
2845
0
  va_start (ap, first_attr);
2846
0
  while (type != G_MARKUP_COLLECT_INVALID)
2847
0
    {
2848
0
      gboolean mandatory;
2849
0
      const gchar *value;
2850
2851
0
      mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2852
0
      type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
2853
2854
      /* tristate records a value != TRUE and != FALSE
2855
       * for the case where the attribute is missing
2856
       */
2857
0
      if (type == G_MARKUP_COLLECT_TRISTATE)
2858
0
        mandatory = FALSE;
2859
2860
0
      for (i = 0; attribute_names[i]; i++)
2861
0
        if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
2862
0
          if (!strcmp (attribute_names[i], attr))
2863
0
            break;
2864
2865
      /* ISO C99 only promises that the user can pass up to 127 arguments.
2866
       * Subtracting the first 4 arguments plus the final NULL and dividing
2867
       * by 3 arguments per collected attribute, we are left with a maximum
2868
       * number of supported attributes of (127 - 5) / 3 = 40.
2869
       *
2870
       * In reality, nobody is ever going to call us with anywhere close to
2871
       * 40 attributes to collect, so it is safe to assume that if i > 40
2872
       * then the user has given some invalid or repeated arguments.  These
2873
       * problems will be caught and reported at the end of the function.
2874
       *
2875
       * We know at this point that we have an error, but we don't know
2876
       * what error it is, so just continue...
2877
       */
2878
0
      if (i < 40)
2879
0
        collected |= (G_GUINT64_CONSTANT(1) << i);
2880
2881
0
      value = attribute_values[i];
2882
2883
0
      if (value == NULL && mandatory)
2884
0
        {
2885
0
          g_set_error (error, G_MARKUP_ERROR,
2886
0
                       G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2887
0
                       "element '%s' requires attribute '%s'",
2888
0
                       element_name, attr);
2889
2890
0
          va_end (ap);
2891
0
          goto failure;
2892
0
        }
2893
2894
0
      switch (type)
2895
0
        {
2896
0
        case G_MARKUP_COLLECT_STRING:
2897
0
          {
2898
0
            const char **str_ptr;
2899
2900
0
            str_ptr = va_arg (ap, const char **);
2901
2902
0
            if (str_ptr != NULL)
2903
0
              *str_ptr = value;
2904
0
          }
2905
0
          break;
2906
2907
0
        case G_MARKUP_COLLECT_STRDUP:
2908
0
          {
2909
0
            char **str_ptr;
2910
2911
0
            str_ptr = va_arg (ap, char **);
2912
2913
0
            if (str_ptr != NULL)
2914
0
              *str_ptr = g_strdup (value);
2915
0
          }
2916
0
          break;
2917
2918
0
        case G_MARKUP_COLLECT_BOOLEAN:
2919
0
        case G_MARKUP_COLLECT_TRISTATE:
2920
0
          if (value == NULL)
2921
0
            {
2922
0
              gboolean *bool_ptr;
2923
2924
0
              bool_ptr = va_arg (ap, gboolean *);
2925
2926
0
              if (bool_ptr != NULL)
2927
0
                {
2928
0
                  if (type == G_MARKUP_COLLECT_TRISTATE)
2929
                    /* constructivists rejoice!
2930
                     * neither false nor true...
2931
                     */
2932
0
                    *bool_ptr = -1;
2933
2934
0
                  else /* G_MARKUP_COLLECT_BOOLEAN */
2935
0
                    *bool_ptr = FALSE;
2936
0
                }
2937
0
            }
2938
0
          else
2939
0
            {
2940
0
              if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
2941
0
                {
2942
0
                  g_set_error (error, G_MARKUP_ERROR,
2943
0
                               G_MARKUP_ERROR_INVALID_CONTENT,
2944
0
                               "element '%s', attribute '%s', value '%s' "
2945
0
                               "cannot be parsed as a boolean value",
2946
0
                               element_name, attr, value);
2947
2948
0
                  va_end (ap);
2949
0
                  goto failure;
2950
0
                }
2951
0
            }
2952
2953
0
          break;
2954
2955
0
        default:
2956
0
          g_assert_not_reached ();
2957
0
        }
2958
2959
0
      written++;
2960
0
      type = va_arg (ap, GMarkupCollectType);
2961
0
      if (type != G_MARKUP_COLLECT_INVALID)
2962
0
        attr = va_arg (ap, const char *);
2963
0
    }
2964
0
  va_end (ap);
2965
2966
  /* ensure we collected all the arguments */
2967
0
  for (i = 0; attribute_names[i]; i++)
2968
0
    if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
2969
0
      {
2970
        /* attribute not collected:  could be caused by two things.
2971
         *
2972
         * 1) it doesn't exist in our list of attributes
2973
         * 2) it existed but was matched by a duplicate attribute earlier
2974
         *
2975
         * find out.
2976
         */
2977
0
        int j;
2978
2979
0
        for (j = 0; j < i; j++)
2980
0
          if (strcmp (attribute_names[i], attribute_names[j]) == 0)
2981
            /* duplicate! */
2982
0
            break;
2983
2984
        /* j is now the first occurrence of attribute_names[i] */
2985
0
        if (i == j)
2986
0
          g_set_error (error, G_MARKUP_ERROR,
2987
0
                       G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2988
0
                       "attribute '%s' invalid for element '%s'",
2989
0
                       attribute_names[i], element_name);
2990
0
        else
2991
0
          g_set_error (error, G_MARKUP_ERROR,
2992
0
                       G_MARKUP_ERROR_INVALID_CONTENT,
2993
0
                       "attribute '%s' given multiple times for element '%s'",
2994
0
                       attribute_names[i], element_name);
2995
2996
0
        goto failure;
2997
0
      }
2998
2999
0
  return TRUE;
3000
3001
0
failure:
3002
  /* replay the above to free allocations */
3003
0
  type = first_type;
3004
3005
0
  va_start (ap, first_attr);
3006
0
  while (type != G_MARKUP_COLLECT_INVALID)
3007
0
    {
3008
0
      gpointer ptr;
3009
3010
0
      ptr = va_arg (ap, gpointer);
3011
3012
0
      if (ptr != NULL)
3013
0
        {
3014
0
          switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
3015
0
            {
3016
0
            case G_MARKUP_COLLECT_STRDUP:
3017
0
              if (written)
3018
0
                g_free (*(char **) ptr);
3019
0
              *(char **) ptr = NULL;
3020
0
              break;
3021
3022
0
            case G_MARKUP_COLLECT_STRING:
3023
0
              *(char **) ptr = NULL;
3024
0
              break;
3025
3026
0
            case G_MARKUP_COLLECT_BOOLEAN:
3027
0
              *(gboolean *) ptr = FALSE;
3028
0
              break;
3029
3030
0
            case G_MARKUP_COLLECT_TRISTATE:
3031
0
              *(gboolean *) ptr = -1;
3032
0
              break;
3033
0
            }
3034
0
        }
3035
3036
0
      type = va_arg (ap, GMarkupCollectType);
3037
0
      if (type != G_MARKUP_COLLECT_INVALID)
3038
0
        {
3039
0
          attr = va_arg (ap, const char *);
3040
0
          (void) attr;
3041
0
        }
3042
0
    }
3043
0
  va_end (ap);
3044
3045
0
  return FALSE;
3046
0
}