Coverage Report

Created: 2026-02-26 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vinyl-cache/bin/varnishd/cache/cache_esi_parse.c
Line
Count
Source
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * VEP Varnish Esi Parsing
31
 */
32
33
#include "config.h"
34
35
#include "cache_varnishd.h"
36
#include "cache_filter.h"
37
38
#include "cache_vgz.h"
39
#include "cache_esi.h"
40
#include "vct.h"
41
#include "vend.h"
42
#include "vgz.h"
43
44
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
45
#define Debug(fmt, ...) /**/
46
47
struct vep_state;
48
49
enum dowhat {DO_ATTR, DO_TAG};
50
typedef void dostuff_f(struct vep_state *, enum dowhat);
51
52
struct vep_match {
53
  const char  *match;
54
  const char  * const *state;
55
};
56
57
enum vep_mark { VERBATIM = 0, SKIP };
58
59
struct vep_state {
60
  unsigned    magic;
61
#define VEP_MAGIC   0x55cb9b82
62
  struct vsb    *vsb;
63
64
  const char    *url;
65
  struct vfp_ctx    *vc;
66
  int     dogzip;
67
  vep_callback_t    *cb;
68
  void      *cb_priv;
69
70
  /* Internal Counter for default call-back function */
71
  ssize_t     cb_x;
72
73
  /* parser state */
74
  const char    *state;
75
  unsigned    startup;
76
  unsigned    esi_found;
77
78
  unsigned    endtag;
79
  unsigned    emptytag;
80
  unsigned    canattr;
81
82
  unsigned    remove;
83
84
  ssize_t     o_wait;
85
  ssize_t     o_pending;
86
  ssize_t     o_total;
87
  uint32_t    crc;
88
  ssize_t     o_crc;
89
  uint32_t    crcp;
90
  ssize_t     o_last;
91
92
  const char    *hack_p;
93
  const char    *ver_p;
94
95
  const char    *until;
96
  const char    *until_p;
97
  const char    *until_s;
98
99
  int     in_esi_tag;
100
101
  const char    *esicmt;
102
  const char    *esicmt_p;
103
104
  struct vep_match  *attr;
105
  struct vsb    *attr_vsb;
106
  int     attr_delim;
107
108
  struct vep_match  *match;
109
  struct vep_match  *match_hit;
110
111
  char      tag[8];
112
  int     tag_i;
113
114
  dostuff_f   *dostuff;
115
116
  struct vsb    *include_src;
117
  unsigned    include_continue;
118
119
  unsigned    nm_skip;
120
  unsigned    nm_verbatim;
121
  unsigned    nm_pending;
122
  enum vep_mark   last_mark;
123
};
124
125
/*---------------------------------------------------------------------*/
126
127
static const char * const VEP_START =   "[Start]";
128
static const char * const VEP_BOM =   "[BOM]";
129
static const char * const VEP_TESTXML =   "[TestXml]";
130
static const char * const VEP_NOTXML =    "[NotXml]";
131
132
static const char * const VEP_NEXTTAG =   "[NxtTag]";
133
static const char * const VEP_NOTMYTAG =  "[NotMyTag]";
134
135
static const char * const VEP_STARTTAG =  "[StartTag]";
136
static const char * const VEP_COMMENTESI =  "[CommentESI]";
137
static const char * const VEP_COMMENT =   "[Comment]";
138
static const char * const VEP_CDATA =   "[CDATA]";
139
static const char * const VEP_ESITAG =    "[ESITag]";
140
static const char * const VEP_ESIENDTAG = "[/ESITag]";
141
142
static const char * const VEP_ESIREMOVE = "[ESI:Remove]";
143
static const char * const VEP_ESIINCLUDE =  "[ESI:Include]";
144
static const char * const VEP_ESICOMMENT =  "[ESI:Comment]";
145
static const char * const VEP_ESIBOGON =  "[ESI:Bogon]";
146
147
static const char * const VEP_INTAG =   "[InTag]";
148
static const char * const VEP_TAGERROR =  "[TagError]";
149
150
static const char * const VEP_ATTR =    "[Attribute]";
151
static const char * const VEP_SKIPATTR =  "[SkipAttribute]";
152
static const char * const VEP_ATTRDELIM = "[AttrDelim]";
153
static const char * const VEP_ATTRGETVAL =  "[AttrGetValue]";
154
static const char * const VEP_ATTRVAL =   "[AttrValue]";
155
156
static const char * const VEP_UNTIL =   "[Until]";
157
static const char * const VEP_MATCHBUF =  "[MatchBuf]";
158
static const char * const VEP_MATCH =   "[Match]";
159
160
/*---------------------------------------------------------------------*/
161
162
static struct vep_match vep_match_starttag[] = {
163
  { "!--esi", &VEP_COMMENTESI },
164
  { "!---->", &VEP_NEXTTAG },
165
  { "!--",  &VEP_COMMENT },
166
  { "/esi:",  &VEP_ESIENDTAG },
167
  { "esi:", &VEP_ESITAG },
168
  { "![CDATA[", &VEP_CDATA },
169
  { NULL,   &VEP_NOTMYTAG }
170
};
171
172
/*---------------------------------------------------------------------*/
173
174
static struct vep_match vep_match_esi[] = {
175
  { "include",  &VEP_ESIINCLUDE },
176
  { "remove", &VEP_ESIREMOVE },
177
  { "comment",  &VEP_ESICOMMENT },
178
  { NULL,   &VEP_ESIBOGON }
179
};
180
181
/*---------------------------------------------------------------------*/
182
183
static struct vep_match vep_match_attr_include[] = {
184
  { "src=", &VEP_ATTRGETVAL },
185
  { "onerror=", &VEP_ATTRGETVAL },
186
  { NULL,   &VEP_SKIPATTR }
187
};
188
189
/*---------------------------------------------------------------------*/
190
191
static struct vep_match vep_match_bom[] = {
192
  { "\xeb\xbb\xbf", &VEP_START },
193
  { NULL,     &VEP_BOM }
194
};
195
196
/*--------------------------------------------------------------------
197
 * Report a parsing error
198
 */
199
200
static void
201
vep_error(const struct vep_state *vep, const char *p)
202
85.4k
{
203
85.4k
  VSC_C_main->esi_errors++;
204
85.4k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s",
205
85.4k
       vep->o_last, p);
206
85.4k
}
207
208
/*--------------------------------------------------------------------
209
 * Report a parsing warning
210
 */
211
212
static void
213
vep_warn(const struct vep_state *vep, const char *p)
214
8.80k
{
215
8.80k
  VSC_C_main->esi_warnings++;
216
8.80k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s",
217
8.80k
       vep->o_last, p);
218
8.80k
}
219
220
/*---------------------------------------------------------------------
221
 * return match or NULL if more input needed.
222
 */
223
224
static struct vep_match *
225
vep_match(const struct vep_state *vep, const char *b, const char *e)
226
239k
{
227
239k
  struct vep_match *vm;
228
239k
  const char *q, *r;
229
230
239k
  AN(vep->match);
231
571k
  for (vm = vep->match; vm->match != NULL; vm++) {
232
536k
    assert(strlen(vm->match) <= sizeof (vep->tag));
233
536k
    r = b;
234
1.76M
    for (q = vm->match; *q != '\0' && r < e; q++, r++)
235
1.55M
      if (*q != *r)
236
331k
        break;
237
536k
    if (*q == '\0')
238
204k
      break;
239
331k
    if (r == e)
240
53
      return (NULL);
241
331k
  }
242
239k
  return (vm);
243
239k
}
244
245
/*---------------------------------------------------------------------
246
 *
247
 */
248
249
static void
250
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
251
298k
{
252
298k
  uint8_t buf[9];
253
254
298k
  assert(l > 0);
255
298k
  if (l < 256) {
256
297k
    buf[0] = (uint8_t)m8;
257
297k
    buf[1] = (uint8_t)l;
258
297k
    assert((ssize_t)buf[1] == l);
259
297k
    VSB_bcat(vep->vsb, buf, 2);
260
297k
  } else if (l < 65536) {
261
1.27k
    buf[0] = (uint8_t)m16;
262
1.27k
    vbe16enc(buf + 1, (uint16_t)l);
263
1.27k
    assert((ssize_t)vbe16dec(buf + 1) == l);
264
1.27k
    VSB_bcat(vep->vsb, buf, 3);
265
1.27k
  } else {
266
143
    buf[0] = (uint8_t)m64;
267
143
    vbe64enc(buf + 1, l);
268
143
    assert((ssize_t)vbe64dec(buf + 1) == l);
269
143
    VSB_bcat(vep->vsb, buf, 9);
270
143
  }
271
298k
}
272
273
static void
274
vep_emit_skip(const struct vep_state *vep, ssize_t l)
275
158k
{
276
277
158k
  vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
278
158k
}
279
280
static void
281
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
282
140k
{
283
140k
  uint8_t buf[4];
284
285
140k
  vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
286
140k
  if (vep->dogzip) {
287
0
    vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
288
0
    vbe32enc(buf, vep->crc);
289
0
    VSB_bcat(vep->vsb, buf, sizeof buf);
290
0
  }
291
140k
}
292
293
static void
294
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
295
300k
{
296
297
300k
  assert(l >= 0);
298
300k
  if (l == 0)
299
1.60k
    return;
300
298k
  assert(mark == SKIP || mark == VERBATIM);
301
298k
  if (mark == SKIP)
302
158k
    vep_emit_skip(vep, l);
303
140k
  else
304
140k
    vep_emit_verbatim(vep, l, vep->o_crc);
305
306
298k
  vep->crc = crc32(0L, Z_NULL, 0);
307
298k
  vep->o_crc = 0;
308
298k
  vep->o_total += l;
309
298k
}
310
311
/*---------------------------------------------------------------------
312
 *
313
 */
314
315
static void
316
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
317
513k
{
318
513k
  ssize_t l, lcb;
319
320
513k
  assert(mark == SKIP || mark == VERBATIM);
321
322
  /* The NO-OP case, no data, no pending data & no change of mode */
323
513k
  if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
324
0
    return;
325
326
  /*
327
   * If we changed mode, emit whatever the opposite mode
328
   * assembled before the pending bytes.
329
   */
330
331
513k
  if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
332
298k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0,
333
298k
        mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
334
298k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
335
298k
    vep->o_last = lcb;
336
298k
    vep->o_wait = 0;
337
298k
  }
338
339
  /* Transfer pending bytes CRC into active mode CRC */
340
513k
  if (vep->o_pending) {
341
211
    (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
342
211
         VGZ_NORMAL);
343
211
    if (vep->o_crc == 0) {
344
49
      vep->crc = vep->crcp;
345
49
      vep->o_crc = vep->o_pending;
346
162
    } else {
347
162
      vep->crc = crc32_combine(vep->crc,
348
162
          vep->crcp, vep->o_pending);
349
162
      vep->o_crc += vep->o_pending;
350
162
    }
351
211
    vep->crcp = crc32(0L, Z_NULL, 0);
352
211
    vep->o_wait += vep->o_pending;
353
211
    vep->o_pending = 0;
354
211
  }
355
356
  /* * Process this bit of input */
357
513k
  AN(vep->ver_p);
358
513k
  l = p - vep->ver_p;
359
513k
  assert(l >= 0);
360
513k
  vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
361
513k
  vep->o_crc += l;
362
513k
  vep->ver_p = p;
363
364
513k
  vep->o_wait += l;
365
513k
  vep->last_mark = mark;
366
513k
  (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
367
513k
}
368
369
static void
370
vep_mark_verbatim(struct vep_state *vep, const char *p)
371
191k
{
372
373
191k
  vep_mark_common(vep, p, VERBATIM);
374
191k
  vep->nm_verbatim++;
375
191k
}
376
377
static void
378
vep_mark_skip(struct vep_state *vep, const char *p)
379
320k
{
380
381
320k
  vep_mark_common(vep, p, SKIP);
382
320k
  vep->nm_skip++;
383
320k
}
384
385
static void
386
vep_mark_pending(struct vep_state *vep, const char *p)
387
211
{
388
211
  ssize_t l;
389
390
211
  AN(vep->ver_p);
391
211
  l = p - vep->ver_p;
392
211
  assert(l > 0);
393
211
  vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
394
211
  vep->ver_p = p;
395
396
211
  vep->o_pending += l;
397
211
  vep->nm_pending++;
398
211
}
399
400
/*---------------------------------------------------------------------
401
 */
402
403
static void v_matchproto_()
404
vep_do_comment(struct vep_state *vep, enum dowhat what)
405
797
{
406
797
  Debug("DO_COMMENT(%d)\n", what);
407
797
  assert(what == DO_TAG);
408
797
  if (!vep->emptytag)
409
495
    vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
410
797
}
411
412
/*---------------------------------------------------------------------
413
 */
414
415
static void v_matchproto_()
416
vep_do_remove(struct vep_state *vep, enum dowhat what)
417
4.39k
{
418
4.39k
  Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
419
4.39k
      what, vep->endtag, vep->emptytag, vep->remove);
420
4.39k
  assert(what == DO_TAG);
421
4.39k
  if (vep->emptytag)
422
387
    vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
423
4.00k
  else if (vep->remove && !vep->endtag)
424
2.89k
    vep_error(vep, "ESI 1.0 <esi:remove> already open");
425
1.11k
  else if (!vep->remove && vep->endtag)
426
283
    vep_error(vep, "ESI 1.0 <esi:remove> not open");
427
831
  else
428
831
    vep->remove = !vep->endtag;
429
4.39k
}
430
431
/*---------------------------------------------------------------------
432
 */
433
434
static void
435
include_attr_src(struct vep_state *vep)
436
10.3k
{
437
10.3k
  const char *p;
438
439
10.3k
  if (vep->include_src != NULL) {
440
1.17k
    vep_error(vep,
441
1.17k
        "ESI 1.0 <esi:include> "
442
1.17k
        "has multiple src= attributes");
443
1.17k
    vep->state = VEP_TAGERROR;
444
1.17k
    VSB_destroy(&vep->attr_vsb);
445
1.17k
    VSB_destroy(&vep->include_src);
446
1.17k
    return;
447
1.17k
  }
448
11.9M
  for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
449
11.9M
    if (vct_islws(*p))
450
674
      break;
451
9.16k
  if (*p != '\0') {
452
674
    vep_error(vep,
453
674
        "ESI 1.0 <esi:include> "
454
674
        "has whitespace in src= attribute");
455
674
    vep->state = VEP_TAGERROR;
456
674
    VSB_destroy(&vep->attr_vsb);
457
674
    if (vep->include_src != NULL)
458
0
      VSB_destroy(&vep->include_src);
459
674
    return;
460
674
  }
461
8.49k
  vep->include_src = vep->attr_vsb;
462
8.49k
  vep->attr_vsb = NULL;
463
8.49k
}
464
465
static void
466
include_attr_onerror(struct vep_state *vep)
467
0
{
468
469
0
  vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb));
470
0
  VSB_destroy(&vep->attr_vsb);
471
0
}
472
473
static void v_matchproto_()
474
vep_do_include(struct vep_state *vep, enum dowhat what)
475
18.4k
{
476
18.4k
  const char *p, *q, *h;
477
18.4k
  ssize_t l;
478
18.4k
  char incl;
479
480
18.4k
  Debug("DO_INCLUDE(%d)\n", what);
481
18.4k
  if (what == DO_ATTR) {
482
10.3k
    Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
483
10.3k
      VSB_data(vep->attr_vsb));
484
10.3k
    if (!strcmp("src=", vep->match_hit->match)) {
485
10.3k
      include_attr_src(vep);
486
10.3k
      return;
487
10.3k
    }
488
0
    if (!strcmp("onerror=", vep->match_hit->match)) {
489
0
      include_attr_onerror(vep);
490
0
      return;
491
0
    }
492
0
    WRONG("Unhandled <esi:include> attribute");
493
0
  }
494
8.07k
  assert(what == DO_TAG);
495
8.07k
  if (!vep->emptytag)
496
7.62k
    vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
497
8.07k
  if (vep->include_src == NULL) {
498
797
    vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
499
797
    return;
500
797
  }
501
502
  /*
503
   * Strictly speaking, we ought to spit out any piled up skip before
504
   * emitting the VEC for the include, but objectively that makes no
505
   * difference and robs us of a chance to collapse another skip into
506
   * this on so we don't do that.
507
   * However, we cannot tolerate any verbatim stuff piling up.
508
   * The mark_skip() before calling dostuff should have taken
509
   * care of that.  Make sure.
510
   */
511
7.27k
  assert(vep->o_wait == 0 || vep->last_mark == SKIP);
512
  /* XXX: what if it contains NUL bytes ?? */
513
7.27k
  p = VSB_data(vep->include_src);
514
7.27k
  l = VSB_len(vep->include_src);
515
7.27k
  h = 0;
516
517
7.27k
  incl = vep->include_continue ? VEC_IC : VEC_IA;
518
519
7.27k
  if (l > 7 && !memcmp(p, "http://", 7)) {
520
3.25k
    h = p + 7;
521
3.25k
    p = strchr(h, '/');
522
3.25k
    if (p == NULL) {
523
542
      vep_error(vep,
524
542
          "ESI 1.0 <esi:include> invalid src= URL");
525
542
      vep->state = VEP_TAGERROR;
526
542
      AZ(vep->attr_vsb);
527
542
      VSB_destroy(&vep->include_src);
528
542
      return;
529
542
    }
530
2.70k
    Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
531
2.70k
    VSB_printf(vep->vsb, "%c", incl);
532
2.70k
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
533
4.02k
  } else if (l > 8 && !memcmp(p, "https://", 8)) {
534
1.17k
    if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
535
305
      vep_warn(vep,
536
305
          "ESI 1.0 <esi:include> with https:// ignored");
537
305
      vep->state = VEP_TAGERROR;
538
305
      AZ(vep->attr_vsb);
539
305
      VSB_destroy(&vep->include_src);
540
305
      return;
541
305
    }
542
870
    vep_warn(vep,
543
870
        "ESI 1.0 <esi:include> https:// treated as http://");
544
870
    h = p + 8;
545
870
    p = strchr(h, '/');
546
870
    if (p == NULL) {
547
576
      vep_error(vep,
548
576
          "ESI 1.0 <esi:include> invalid src= URL");
549
576
      vep->state = VEP_TAGERROR;
550
576
      AZ(vep->attr_vsb);
551
576
      VSB_destroy(&vep->include_src);
552
576
      return;
553
576
    }
554
294
    VSB_printf(vep->vsb, "%c", incl);
555
294
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
556
2.85k
  } else if (*p == '/') {
557
588
    VSB_printf(vep->vsb, "%c", incl);
558
588
    VSB_printf(vep->vsb, "%c", 0);
559
2.26k
  } else {
560
2.26k
    VSB_printf(vep->vsb, "%c", incl);
561
2.26k
    VSB_printf(vep->vsb, "%c", 0);
562
    /* Look for the last / before a '?' */
563
2.26k
    h = NULL;
564
4.52k
    for (q = vep->url; *q && *q != '?'; q++)
565
2.26k
      if (*q == '/')
566
2.26k
        h = q;
567
2.26k
    if (h == NULL)
568
0
      h = q + 1;
569
570
2.26k
    Debug("INCL:: [%.*s]/[%s]\n",
571
2.26k
        (int)(h - vep->url), vep->url, p);
572
2.26k
    VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
573
2.26k
  }
574
5.85k
  l -= (p - VSB_data(vep->include_src));
575
6.83M
  for (q = p; *q != '\0'; ) {
576
6.83M
    if (*q == '&') {
577
14.5k
#define R(w,f,r)              \
578
60.8k
      if (q + w <= p + l && !memcmp(q, f, w)) { \
579
6.05k
        VSB_printf(vep->vsb, "%c", r);  \
580
6.05k
        q += w;       \
581
6.05k
        continue;     \
582
6.05k
      }
583
14.5k
      R(6, "&apos;", '\'');
584
13.5k
      R(6, "&quot;", '"');
585
12.0k
      R(4, "&lt;", '<');
586
11.3k
      R(4, "&gt;", '>');
587
9.34k
      R(5, "&amp;", '&');
588
8.50k
    }
589
6.82M
    VSB_printf(vep->vsb, "%c", *q++);
590
6.82M
  }
591
5.85k
#undef R
592
5.85k
  VSB_printf(vep->vsb, "%c", 0);
593
5.85k
  VSB_destroy(&vep->include_src);
594
5.85k
  vep->include_continue = 0;
595
5.85k
}
596
597
/*---------------------------------------------------------------------
598
 * Lex/Parse object for ESI instructions
599
 *
600
 * This function is called with the input object piecemeal so do not
601
 * assume that we have more than one char available at at time, but
602
 * optimize for getting huge chunks.
603
 *
604
 * NB: At the bottom of this source-file, there is a dot-diagram matching
605
 * NB: the state-machine.  Please maintain it along with the code.
606
 */
607
608
void
609
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
610
1.60k
{
611
1.60k
  const char *e;
612
1.60k
  struct vep_match *vm;
613
1.60k
  int i;
614
615
1.60k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
616
1.60k
  assert(l > 0);
617
618
1.60k
  if (vep->startup) {
619
    /*
620
     * We must force the GZIP header out as a SKIP string,
621
     * otherwise an object starting with <esi:include would
622
     * have its GZIP header appear after the included object
623
     * (e000026.vtc)
624
     */
625
1.60k
    vep->ver_p = "";
626
1.60k
    vep->last_mark = SKIP;
627
1.60k
    vep_mark_common(vep, vep->ver_p, VERBATIM);
628
1.60k
    vep->startup = 0;
629
1.60k
    AZ(vep->hack_p);
630
1.60k
    vep->hack_p = p;
631
1.60k
  }
632
633
1.60k
  vep->ver_p = p;
634
635
1.60k
  e = p + l;
636
637
991k
  while (p < e) {
638
989k
    AN(vep->state);
639
989k
    Debug("EP %s %d (%.*s) [%.*s]\n",
640
989k
        vep->state,
641
989k
        vep->remove,
642
989k
        vep->tag_i, vep->tag,
643
989k
        (e - p) > 10 ? 10 : (int)(e-p), p);
644
989k
    assert(p >= vep->ver_p);
645
646
    /******************************************************
647
     * SECTION A
648
     */
649
650
989k
    if (vep->state == VEP_START) {
651
3.35k
      if (FEATURE(FEATURE_ESI_REMOVE_BOM) &&
652
3.25k
          *p == (char)0xeb) {
653
1.78k
        vep->match = vep_match_bom;
654
1.78k
        vep->state = VEP_MATCH;
655
1.78k
      } else
656
1.57k
        vep->state = VEP_BOM;
657
986k
    } else if (vep->state == VEP_BOM) {
658
1.58k
      vep_mark_skip(vep, p);
659
1.58k
      if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
660
1.49k
        vep->state = VEP_NEXTTAG;
661
86
      else
662
86
        vep->state = VEP_TESTXML;
663
984k
    } else if (vep->state == VEP_TESTXML) {
664
      /*
665
       * If the first non-whitespace char is different
666
       * from '<' we assume this is not XML.
667
       */
668
434
      while (p < e && vct_islws(*p))
669
348
        p++;
670
86
      vep_mark_verbatim(vep, p);
671
86
      if (p < e && *p == '<') {
672
14
        p++;
673
14
        vep->state = VEP_STARTTAG;
674
72
      } else if (p < e && *p == (char)0xeb) {
675
2
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
676
2
            "WARN: No ESI processing, "
677
2
            "first char not '<' but BOM."
678
2
            " (See feature esi_remove_bom)"
679
2
        );
680
2
        vep->state = VEP_NOTXML;
681
70
      } else if (p < e) {
682
61
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
683
61
            "WARN: No ESI processing, "
684
61
            "first char not '<'."
685
61
            " (See feature esi_disable_xml_check)"
686
61
        );
687
61
        vep->state = VEP_NOTXML;
688
61
      }
689
984k
    } else if (vep->state == VEP_NOTXML) {
690
      /*
691
       * This is not recognized as XML, just skip thru
692
       * vfp_esi_end() will handle the rest
693
       */
694
63
      p = e;
695
63
      vep_mark_verbatim(vep, p);
696
697
    /******************************************************
698
     * SECTION B
699
     */
700
701
984k
    } else if (vep->state == VEP_NOTMYTAG) {
702
26.0k
      if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
703
21.1k
        p++;
704
21.1k
        vep->state = VEP_NEXTTAG;
705
21.1k
      } else {
706
4.94k
        vep->tag_i = 0;
707
1.24M
        while (p < e) {
708
1.24M
          if (*p++ == '>') {
709
4.93k
            vep->state = VEP_NEXTTAG;
710
4.93k
            break;
711
4.93k
          }
712
1.24M
        }
713
4.94k
      }
714
26.0k
      if (p == e && !vep->remove)
715
45
        vep_mark_verbatim(vep, p);
716
958k
    } else if (vep->state == VEP_NEXTTAG) {
717
      /*
718
       * Hunt for start of next tag and keep an eye
719
       * out for end of EsiCmt if armed.
720
       */
721
193k
      vep->emptytag = 0;
722
193k
      vep->attr = NULL;
723
193k
      vep->dostuff = NULL;
724
14.9M
      while (p < e && *p != '<') {
725
14.7M
        if (vep->esicmt_p == NULL) {
726
13.9M
          p++;
727
13.9M
          continue;
728
13.9M
        }
729
829k
        if (*p != *vep->esicmt_p) {
730
436k
          p++;
731
436k
          vep->esicmt_p = vep->esicmt;
732
436k
          continue;
733
436k
        }
734
392k
        if (!vep->remove && vep->esicmt_p == vep->esicmt)
735
69.7k
          vep_mark_verbatim(vep, p);
736
392k
        p++;
737
392k
        if (*++vep->esicmt_p == '\0') {
738
128k
          vep->esi_found = 1;
739
128k
          vep->esicmt = NULL;
740
128k
          vep->esicmt_p = NULL;
741
          /*
742
           * The end of the esicmt
743
           * should not be emitted.
744
           * But the stuff before should
745
           */
746
128k
          vep_mark_skip(vep, p);
747
128k
        }
748
392k
      }
749
193k
      if (p < e) {
750
192k
        if (!vep->remove)
751
121k
          vep_mark_verbatim(vep, p);
752
192k
        assert(*p == '<');
753
192k
        p++;
754
192k
        vep->state = VEP_STARTTAG;
755
192k
      } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
756
246
        vep_mark_verbatim(vep, p);
757
758
    /******************************************************
759
     * SECTION C
760
     */
761
762
765k
    } else if (vep->state == VEP_STARTTAG) {
763
      /* Start of tag, set up match table */
764
192k
      vep->endtag = 0;
765
192k
      vep->match = vep_match_starttag;
766
192k
      vep->state = VEP_MATCH;
767
573k
    } else if (vep->state == VEP_COMMENT) {
768
2.04k
      vep->esicmt_p = vep->esicmt = NULL;
769
2.04k
      vep->until_p = vep->until = "-->";
770
2.04k
      vep->until_s = VEP_NEXTTAG;
771
2.04k
      vep->state = VEP_UNTIL;
772
571k
    } else if (vep->state == VEP_COMMENTESI) {
773
130k
      if (vep->remove)
774
62.3k
        vep_error(vep,
775
62.3k
            "ESI 1.0 Nested <!--esi"
776
62.3k
            " element in <esi:remove>");
777
130k
      vep->esicmt_p = vep->esicmt = "-->";
778
130k
      vep->state = VEP_NEXTTAG;
779
130k
      vep_mark_skip(vep, p);
780
441k
    } else if (vep->state == VEP_CDATA) {
781
      /*
782
       * Easy: just look for the end of CDATA
783
       */
784
4.10k
      vep->until_p = vep->until = "]]>";
785
4.10k
      vep->until_s = VEP_NEXTTAG;
786
4.10k
      vep->state = VEP_UNTIL;
787
436k
    } else if (vep->state == VEP_ESIENDTAG) {
788
2.07k
      vep->endtag = 1;
789
2.07k
      vep->state = VEP_ESITAG;
790
434k
    } else if (vep->state == VEP_ESITAG) {
791
29.9k
      vep->in_esi_tag = 1;
792
29.9k
      vep->esi_found = 1;
793
29.9k
      vep_mark_skip(vep, p);
794
29.9k
      vep->match = vep_match_esi;
795
29.9k
      vep->state = VEP_MATCH;
796
404k
    } else if (vep->state == VEP_ESIINCLUDE) {
797
16.8k
      if (vep->remove) {
798
1.34k
        vep_error(vep,
799
1.34k
            "ESI 1.0 <esi:include> element"
800
1.34k
            " nested in <esi:remove>");
801
1.34k
        vep->state = VEP_TAGERROR;
802
15.4k
      } else if (vep->endtag) {
803
303
        vep_error(vep,
804
303
            "ESI 1.0 </esi:include> illegal end-tag");
805
303
        vep->state = VEP_TAGERROR;
806
15.1k
      } else {
807
15.1k
        vep->dostuff = vep_do_include;
808
15.1k
        vep->state = VEP_INTAG;
809
15.1k
        vep->attr = vep_match_attr_include;
810
15.1k
      }
811
388k
    } else if (vep->state == VEP_ESIREMOVE) {
812
6.25k
      vep->dostuff = vep_do_remove;
813
6.25k
      vep->state = VEP_INTAG;
814
381k
    } else if (vep->state == VEP_ESICOMMENT) {
815
1.59k
      if (vep->remove) {
816
369
        vep_error(vep,
817
369
            "ESI 1.0 <esi:comment> element"
818
369
            " nested in <esi:remove>");
819
369
        vep->state = VEP_TAGERROR;
820
1.22k
      } else if (vep->endtag) {
821
341
        vep_error(vep,
822
341
            "ESI 1.0 </esi:comment> illegal end-tag");
823
341
        vep->state = VEP_TAGERROR;
824
881
      } else {
825
881
        vep->dostuff = vep_do_comment;
826
881
        vep->state = VEP_INTAG;
827
881
      }
828
380k
    } else if (vep->state == VEP_ESIBOGON) {
829
5.32k
      vep_error(vep,
830
5.32k
          "ESI 1.0 <esi:bogus> element");
831
5.32k
      vep->state = VEP_TAGERROR;
832
833
    /******************************************************
834
     * SECTION D
835
     */
836
837
374k
    } else if (vep->state == VEP_INTAG) {
838
39.0k
      vep->tag_i = 0;
839
46.0k
      while (p < e && vct_islws(*p) && !vep->emptytag) {
840
7.06k
        p++;
841
7.06k
        vep->canattr = 1;
842
7.06k
      }
843
39.0k
      if (p < e && *p == '/' && !vep->emptytag) {
844
2.84k
        p++;
845
2.84k
        vep->emptytag = 1;
846
2.84k
        vep->canattr = 0;
847
2.84k
      }
848
39.0k
      if (p < e && *p == '>') {
849
13.2k
        p++;
850
13.2k
        AN(vep->dostuff);
851
13.2k
        vep_mark_skip(vep, p);
852
13.2k
        vep->dostuff(vep, DO_TAG);
853
13.2k
        vep->in_esi_tag = 0;
854
13.2k
        vep->state = VEP_NEXTTAG;
855
25.7k
      } else if (p < e && vep->emptytag) {
856
1.69k
        vep_error(vep,
857
1.69k
            "XML 1.0 '>' does not follow '/' in tag");
858
1.69k
        vep->state = VEP_TAGERROR;
859
24.0k
      } else if (p < e && vep->canattr &&
860
21.5k
          vct_isxmlnamestart(*p)) {
861
20.8k
        vep->state = VEP_ATTR;
862
20.8k
      } else if (p < e) {
863
3.19k
        vep_error(vep,
864
3.19k
            "XML 1.0 Illegal attribute start char");
865
3.19k
        vep->state = VEP_TAGERROR;
866
3.19k
      }
867
335k
    } else if (vep->state == VEP_TAGERROR) {
868
4.86M
      while (p < e && *p != '>')
869
4.84M
        p++;
870
16.5k
      if (p < e) {
871
16.3k
        p++;
872
16.3k
        vep_mark_skip(vep, p);
873
16.3k
        vep->in_esi_tag = 0;
874
16.3k
        vep->state = VEP_NEXTTAG;
875
16.3k
        if (vep->attr_vsb)
876
240
          VSB_destroy(&vep->attr_vsb);
877
16.3k
      }
878
879
    /******************************************************
880
     * SECTION E
881
     */
882
883
319k
    } else if (vep->state == VEP_ATTR) {
884
20.8k
      AZ(vep->attr_delim);
885
20.8k
      if (vep->attr == NULL) {
886
5.89k
        p++;
887
5.89k
        AZ(vep->attr_vsb);
888
5.89k
        vep->state = VEP_SKIPATTR;
889
14.9k
      } else {
890
14.9k
        vep->match = vep->attr;
891
14.9k
        vep->state = VEP_MATCH;
892
14.9k
      }
893
298k
    } else if (vep->state == VEP_SKIPATTR) {
894
30.6k
      while (p < e && vct_isxmlname(*p))
895
21.2k
        p++;
896
9.39k
      if (p < e && *p == '=') {
897
4.92k
        p++;
898
4.92k
        vep->state = VEP_ATTRDELIM;
899
4.92k
      } else if (p < e && *p == '>') {
900
674
        vep->state = VEP_INTAG;
901
3.79k
      } else if (p < e && *p == '/') {
902
1.71k
        vep->state = VEP_INTAG;
903
2.07k
      } else if (p < e && vct_issp(*p)) {
904
1.52k
        vep->state = VEP_INTAG;
905
1.52k
      } else if (p < e) {
906
536
        vep_error(vep,
907
536
            "XML 1.0 Illegal attr char");
908
536
        vep->state = VEP_TAGERROR;
909
536
      }
910
289k
    } else if (vep->state == VEP_ATTRGETVAL) {
911
11.4k
      AZ(vep->attr_vsb);
912
11.4k
      vep->attr_vsb = VSB_new_auto();
913
11.4k
      vep->state = VEP_ATTRDELIM;
914
277k
    } else if (vep->state == VEP_ATTRDELIM) {
915
16.3k
      AZ(vep->attr_delim);
916
16.3k
      if (*p == '"' || *p == '\'') {
917
757
        vep->attr_delim = *p++;
918
757
        vep->state = VEP_ATTRVAL;
919
15.5k
      } else if (!vct_issp(*p)) {
920
15.1k
        vep->attr_delim = ' ';
921
15.1k
        vep->state = VEP_ATTRVAL;
922
15.1k
      } else {
923
436
        vep_error(vep,
924
436
            "XML 1.0 Illegal attribute delimiter");
925
436
        vep->state = VEP_TAGERROR;
926
436
      }
927
928
261k
    } else if (vep->state == VEP_ATTRVAL) {
929
15.0M
      while (p < e && *p != '>' && *p != vep->attr_delim &&
930
14.9M
         (vep->attr_delim != ' ' || !vct_issp(*p))) {
931
14.9M
        if (vep->attr_vsb != NULL)
932
14.8M
          VSB_putc(vep->attr_vsb, *p);
933
14.9M
        p++;
934
14.9M
      }
935
15.8k
      if (p < e && *p == '>') {
936
1.13k
        vep_error(vep,
937
1.13k
            "XML 1.0 Missing end attribute delimiter");
938
1.13k
        vep->state = VEP_TAGERROR;
939
1.13k
        vep->attr_delim = 0;
940
1.13k
        if (vep->attr_vsb != NULL) {
941
787
          AZ(VSB_finish(vep->attr_vsb));
942
787
          VSB_destroy(&vep->attr_vsb);
943
787
        }
944
14.7k
      } else if (p < e) {
945
14.6k
        vep->attr_delim = 0;
946
14.6k
        p++;
947
14.6k
        vep->state = VEP_INTAG;
948
14.6k
        if (vep->attr_vsb != NULL) {
949
10.3k
          AZ(VSB_finish(vep->attr_vsb));
950
10.3k
          AN(vep->dostuff);
951
10.3k
          vep->dostuff(vep, DO_ATTR);
952
10.3k
          vep->attr_vsb = NULL;
953
10.3k
        }
954
14.6k
      }
955
956
    /******************************************************
957
     * Utility Section
958
     */
959
960
245k
    } else if (vep->state == VEP_MATCH) {
961
      /*
962
       * Match against a table
963
       */
964
239k
      vm = vep_match(vep, p, e);
965
239k
      vep->match_hit = vm;
966
239k
      if (vm != NULL) {
967
239k
        if (vm->match != NULL)
968
204k
          p += strlen(vm->match);
969
239k
        vep->state = *vm->state;
970
239k
        vep->match = NULL;
971
239k
        vep->tag_i = 0;
972
239k
      } else {
973
53
        assert(p + sizeof(vep->tag) >= e);
974
53
        memcpy(vep->tag, p, e - p);
975
53
        vep->tag_i = e - p;
976
53
        vep->state = VEP_MATCHBUF;
977
53
        p = e;
978
53
      }
979
239k
    } else if (vep->state == VEP_MATCHBUF) {
980
      /*
981
       * Match against a table while split over input
982
       * sections.
983
       */
984
0
      AN(vep->match);
985
0
      i = sizeof(vep->tag) - vep->tag_i;
986
0
      if (i > e - p)
987
0
        i = e - p;
988
0
      memcpy(vep->tag + vep->tag_i, p, i);
989
0
      vm = vep_match(vep, vep->tag,
990
0
          vep->tag + vep->tag_i + i);
991
0
      Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
992
0
          vep->tag_i + i, vep->tag,
993
0
          vep->tag_i,
994
0
          i,
995
0
          vm,
996
0
          vm ? vm->match : "(nil)");
997
998
0
      if (vm == NULL) {
999
0
        vep->tag_i += i;
1000
0
        p += i;
1001
0
        assert(p == e);
1002
0
      } else {
1003
0
        vep->match_hit = vm;
1004
0
        vep->state = *vm->state;
1005
0
        if (vm->match != NULL) {
1006
0
          i = strlen(vm->match);
1007
0
          if (i > vep->tag_i)
1008
0
            p += i - vep->tag_i;
1009
0
        }
1010
0
        vep->match = NULL;
1011
0
        vep->tag_i = 0;
1012
0
      }
1013
6.15k
    } else if (vep->state == VEP_UNTIL) {
1014
      /*
1015
       * Skip until we see magic string
1016
       */
1017
10.9M
      while (p < e) {
1018
10.9M
        if (*p++ != *vep->until_p++) {
1019
10.9M
          vep->until_p = vep->until;
1020
10.9M
        } else if (*vep->until_p == '\0') {
1021
6.09k
          vep->state = vep->until_s;
1022
6.09k
          break;
1023
6.09k
        }
1024
10.9M
      }
1025
6.15k
      if (p == e && !vep->remove)
1026
62
        vep_mark_verbatim(vep, p);
1027
6.15k
    } else {
1028
0
      Debug("*** Unknown state %s\n", vep->state);
1029
0
      WRONG("WRONG ESI PARSER STATE");
1030
0
    }
1031
989k
  }
1032
  /*
1033
   * We must always mark up the storage we got, try to do so
1034
   * in the most efficient way, in particular with respect to
1035
   * minimizing and limiting use of pending.
1036
   */
1037
1.60k
  if (p == vep->ver_p)
1038
1.00k
    ;
1039
601
  else if (vep->in_esi_tag)
1040
357
    vep_mark_skip(vep, p);
1041
244
  else if (vep->remove)
1042
33
    vep_mark_skip(vep, p);
1043
211
  else
1044
211
    vep_mark_pending(vep, p);
1045
1.60k
}
1046
1047
/*---------------------------------------------------------------------
1048
 */
1049
1050
static ssize_t v_matchproto_(vep_callback_t)
1051
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1052
815k
{
1053
815k
  ssize_t *s;
1054
1055
815k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1056
815k
  AN(priv);
1057
815k
  s = priv;
1058
815k
  *s += l;
1059
815k
  (void)flg;
1060
815k
  return (*s);
1061
815k
}
1062
1063
/*---------------------------------------------------------------------
1064
 */
1065
1066
struct vep_state *
1067
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1068
    void *cb_priv)
1069
1.60k
{
1070
1.60k
  struct vep_state *vep;
1071
1072
1.60k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1073
1.60k
  CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1074
1.60k
  vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1075
1.60k
  if (vep == NULL) {
1076
0
    VSLb(vc->wrk->vsl, SLT_VCL_Error,
1077
0
         "VEP_Init() workspace overflow");
1078
0
    return (NULL);
1079
0
  }
1080
1081
1.60k
  INIT_OBJ(vep, VEP_MAGIC);
1082
1.60k
  vep->url = req->hd[HTTP_HDR_URL].b;
1083
1.60k
  vep->vc = vc;
1084
1.60k
  vep->vsb = VSB_new_auto();
1085
1.60k
  AN(vep->vsb);
1086
1087
1.60k
  if (cb != NULL) {
1088
0
    vep->dogzip = 1;
1089
    /* XXX */
1090
0
    VSB_printf(vep->vsb, "%c", VEC_GZ);
1091
0
    vep->cb = cb;
1092
0
    vep->cb_priv = cb_priv;
1093
1.60k
  } else {
1094
1.60k
    vep->cb = vep_default_cb;
1095
1.60k
    vep->cb_priv = &vep->cb_x;
1096
1.60k
  }
1097
1098
1.60k
  vep->state = VEP_START;
1099
1.60k
  vep->crc = crc32(0L, Z_NULL, 0);
1100
1.60k
  vep->crcp = crc32(0L, Z_NULL, 0);
1101
1102
1.60k
  vep->startup = 1;
1103
1.60k
  return (vep);
1104
1.60k
}
1105
1106
/*---------------------------------------------------------------------
1107
 */
1108
1109
struct vsb *
1110
VEP_Finish(struct vep_state *vep)
1111
1.60k
{
1112
1.60k
  ssize_t l, lcb;
1113
1114
1.60k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1115
1116
1.60k
  if (vep->include_src)
1117
41
    VSB_destroy(&vep->include_src);
1118
1.60k
  if (vep->attr_vsb)
1119
35
    VSB_destroy(&vep->attr_vsb);
1120
1121
1.60k
  if (vep->state != VEP_START &&
1122
1.59k
      vep->state != VEP_BOM &&
1123
1.59k
      vep->state != VEP_TESTXML &&
1124
1.58k
      vep->state != VEP_NOTXML &&
1125
1.52k
      vep->state != VEP_NEXTTAG) {
1126
571
    vep_error(vep, "VEP ended inside a tag");
1127
571
  }
1128
1129
1.60k
  if (vep->o_pending)
1130
211
    vep_mark_common(vep, vep->ver_p, vep->last_mark);
1131
1.60k
  if (vep->o_wait > 0) {
1132
1.57k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1133
1.57k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1134
1.57k
  }
1135
  // NB: We don't account for PAD+SUM+LEN in gzipped objects
1136
1.60k
  (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1137
1138
1.60k
  AZ(VSB_finish(vep->vsb));
1139
1.60k
  l = VSB_len(vep->vsb);
1140
1.60k
  if (vep->esi_found && l > 0)
1141
1.12k
    return (vep->vsb);
1142
480
  VSB_destroy(&vep->vsb);
1143
  return (NULL);
1144
1.60k
}
1145
1146
#if 0
1147
1148
digraph xml {
1149
  rankdir="LR"
1150
  size="7,10"
1151
#################################################################
1152
# SECTION A
1153
#
1154
1155
START   [shape=ellipse]
1156
TESTXML   [shape=ellipse]
1157
NOTXML    [shape=ellipse]
1158
NEXTTAGa  [shape=hexagon, label="NEXTTAG"]
1159
STARTTAGa [shape=hexagon, label="STARTTAG"]
1160
START   -> TESTXML
1161
START   -> NEXTTAGa [style=dotted, label="syntax:1"]
1162
TESTXML   -> TESTXML  [label="lws"]
1163
TESTXML   -> NOTXML
1164
TESTXML   -> STARTTAGa  [label="'<'"]
1165
1166
#################################################################
1167
# SECTION B
1168
1169
NOTMYTAG  [shape=ellipse]
1170
NEXTTAG   [shape=ellipse]
1171
NOTMYTAG  -> NEXTTAG  [style=dotted, label="syntax:2"]
1172
STARTTAGb [shape=hexagon, label="STARTTAG"]
1173
NOTMYTAG  -> NEXTTAG  [label="'>'"]
1174
NOTMYTAG  -> NOTMYTAG [label="*"]
1175
NEXTTAG   -> NEXTTAG  [label="'-->'"]
1176
NEXTTAG   -> NEXTTAG  [label="*"]
1177
NEXTTAG   -> STARTTAGb  [label="'<'"]
1178
1179
#################################################################
1180
# SECTION C
1181
1182
STARTTAG  [shape=ellipse]
1183
COMMENT   [shape=ellipse]
1184
CDATA   [shape=ellipse]
1185
ESITAG    [shape=ellipse]
1186
ESIETAG   [shape=ellipse]
1187
ESIINCLUDE  [shape=ellipse]
1188
ESIREMOVE [shape=ellipse]
1189
ESICOMMENT  [shape=ellipse]
1190
ESIBOGON  [shape=ellipse]
1191
INTAGc    [shape=hexagon, label="INTAG"]
1192
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"]
1193
NEXTTAGc  [shape=hexagon, label="NEXTTAG"]
1194
TAGERRORc [shape=hexagon, label="TAGERROR"]
1195
C1    [shape=circle,label=""]
1196
STARTTAG  -> COMMENT  [label="'<!--'"]
1197
STARTTAG  -> ESITAG [label="'<esi'"]
1198
STARTTAG  -> CDATA  [label="'<![CDATA['"]
1199
STARTTAG  -> NOTMYTAGc  [label="'*'"]
1200
COMMENT   -> NEXTTAGc [label="'esi'"]
1201
COMMENT   -> C1   [label="*"]
1202
C1    -> C1   [label="*"]
1203
C1    -> NEXTTAGc [label="-->"]
1204
CDATA   -> CDATA  [label="*"]
1205
CDATA   -> NEXTTAGc [label="]]>"]
1206
ESITAG    -> ESIINCLUDE [label="'include'"]
1207
ESITAG    -> ESIREMOVE  [label="'remove'"]
1208
ESITAG    -> ESICOMMENT [label="'comment'"]
1209
ESITAG    -> ESIBOGON [label="*"]
1210
ESICOMMENT  -> INTAGc
1211
ESICOMMENT  -> TAGERRORc
1212
ESICOMMENT  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1213
ESIREMOVE -> INTAGc
1214
ESIREMOVE -> TAGERRORc
1215
ESIINCLUDE  -> INTAGc
1216
ESIINCLUDE  -> TAGERRORc
1217
ESIINCLUDE  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1218
ESIBOGON  -> TAGERRORc
1219
1220
#################################################################
1221
# SECTION D
1222
1223
INTAG   [shape=ellipse]
1224
TAGERROR  [shape=ellipse]
1225
NEXTTAGd  [shape=hexagon, label="NEXTTAG"]
1226
ATTRd   [shape=hexagon, label="ATTR"]
1227
D1    [shape=circle, label=""]
1228
D2    [shape=circle, label=""]
1229
INTAG   -> D1   [label="lws"]
1230
D1    -> D2   [label="/"]
1231
INTAG   -> D2   [label="/"]
1232
INTAG   -> NEXTTAGd [label=">"]
1233
D1    -> NEXTTAGd [label=">"]
1234
D2    -> NEXTTAGd [label=">"]
1235
D1    -> ATTRd  [label="XMLstartchar"]
1236
D1    -> TAGERROR [label="*"]
1237
D2    -> TAGERROR [label="*"]
1238
TAGERROR  -> TAGERROR [label="*"]
1239
TAGERROR  -> NEXTTAGd [label="'>'"]
1240
1241
#################################################################
1242
# SECTION E
1243
1244
ATTR    [shape=ellipse]
1245
SKIPATTR  [shape=ellipse]
1246
ATTRGETVAL  [shape=ellipse]
1247
ATTRDELIM [shape=ellipse]
1248
ATTRVAL   [shape=ellipse]
1249
TAGERRORe [shape=hexagon, label="TAGERROR"]
1250
INTAGe    [shape=hexagon, label="INTAG"]
1251
ATTR    -> SKIPATTR [label="*"]
1252
ATTR    -> ATTRGETVAL [label="wanted attr"]
1253
SKIPATTR  -> SKIPATTR [label="XMLname"]
1254
SKIPATTR  -> ATTRDELIM  [label="'='"]
1255
SKIPATTR  -> TAGERRORe  [label="*"]
1256
ATTRGETVAL  -> ATTRDELIM
1257
ATTRDELIM -> ATTRVAL  [label="\""]
1258
ATTRDELIM -> ATTRVAL  [label="\'"]
1259
ATTRDELIM -> ATTRVAL  [label="*"]
1260
ATTRDELIM -> TAGERRORe  [label="lws"]
1261
ATTRVAL   -> TAGERRORe  [label="'>'"]
1262
ATTRVAL   -> INTAGe [label="delim"]
1263
ATTRVAL   -> ATTRVAL  [label="*"]
1264
1265
}
1266
1267
#endif