Coverage Report

Created: 2026-01-17 06:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/varnish-cache/bin/varnishd/cache/cache_esi_parse.c
Line
Count
Source
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * VEP Varnish Esi Parsing
31
 */
32
33
#include "config.h"
34
35
#include "cache_varnishd.h"
36
#include "cache_filter.h"
37
38
#include "cache_vgz.h"
39
#include "cache_esi.h"
40
#include "vct.h"
41
#include "vend.h"
42
#include "vgz.h"
43
44
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
45
#define Debug(fmt, ...) /**/
46
47
struct vep_state;
48
49
enum dowhat {DO_ATTR, DO_TAG};
50
typedef void dostuff_f(struct vep_state *, enum dowhat);
51
52
struct vep_match {
53
  const char  *match;
54
  const char  * const *state;
55
};
56
57
enum vep_mark { VERBATIM = 0, SKIP };
58
59
struct vep_state {
60
  unsigned    magic;
61
#define VEP_MAGIC   0x55cb9b82
62
  struct vsb    *vsb;
63
64
  const char    *url;
65
  struct vfp_ctx    *vc;
66
  int     dogzip;
67
  vep_callback_t    *cb;
68
  void      *cb_priv;
69
70
  /* Internal Counter for default call-back function */
71
  ssize_t     cb_x;
72
73
  /* parser state */
74
  const char    *state;
75
  unsigned    startup;
76
  unsigned    esi_found;
77
78
  unsigned    endtag;
79
  unsigned    emptytag;
80
  unsigned    canattr;
81
82
  unsigned    remove;
83
84
  ssize_t     o_wait;
85
  ssize_t     o_pending;
86
  ssize_t     o_total;
87
  uint32_t    crc;
88
  ssize_t     o_crc;
89
  uint32_t    crcp;
90
  ssize_t     o_last;
91
92
  const char    *hack_p;
93
  const char    *ver_p;
94
95
  const char    *until;
96
  const char    *until_p;
97
  const char    *until_s;
98
99
  int     in_esi_tag;
100
101
  const char    *esicmt;
102
  const char    *esicmt_p;
103
104
  struct vep_match  *attr;
105
  struct vsb    *attr_vsb;
106
  int     attr_delim;
107
108
  struct vep_match  *match;
109
  struct vep_match  *match_hit;
110
111
  char      tag[8];
112
  int     tag_i;
113
114
  dostuff_f   *dostuff;
115
116
  struct vsb    *include_src;
117
  unsigned    include_continue;
118
119
  unsigned    nm_skip;
120
  unsigned    nm_verbatim;
121
  unsigned    nm_pending;
122
  enum vep_mark   last_mark;
123
};
124
125
/*---------------------------------------------------------------------*/
126
127
static const char * const VEP_START =   "[Start]";
128
static const char * const VEP_BOM =   "[BOM]";
129
static const char * const VEP_TESTXML =   "[TestXml]";
130
static const char * const VEP_NOTXML =    "[NotXml]";
131
132
static const char * const VEP_NEXTTAG =   "[NxtTag]";
133
static const char * const VEP_NOTMYTAG =  "[NotMyTag]";
134
135
static const char * const VEP_STARTTAG =  "[StartTag]";
136
static const char * const VEP_COMMENTESI =  "[CommentESI]";
137
static const char * const VEP_COMMENT =   "[Comment]";
138
static const char * const VEP_CDATA =   "[CDATA]";
139
static const char * const VEP_ESITAG =    "[ESITag]";
140
static const char * const VEP_ESIENDTAG = "[/ESITag]";
141
142
static const char * const VEP_ESIREMOVE = "[ESI:Remove]";
143
static const char * const VEP_ESIINCLUDE =  "[ESI:Include]";
144
static const char * const VEP_ESICOMMENT =  "[ESI:Comment]";
145
static const char * const VEP_ESIBOGON =  "[ESI:Bogon]";
146
147
static const char * const VEP_INTAG =   "[InTag]";
148
static const char * const VEP_TAGERROR =  "[TagError]";
149
150
static const char * const VEP_ATTR =    "[Attribute]";
151
static const char * const VEP_SKIPATTR =  "[SkipAttribute]";
152
static const char * const VEP_ATTRDELIM = "[AttrDelim]";
153
static const char * const VEP_ATTRGETVAL =  "[AttrGetValue]";
154
static const char * const VEP_ATTRVAL =   "[AttrValue]";
155
156
static const char * const VEP_UNTIL =   "[Until]";
157
static const char * const VEP_MATCHBUF =  "[MatchBuf]";
158
static const char * const VEP_MATCH =   "[Match]";
159
160
/*---------------------------------------------------------------------*/
161
162
static struct vep_match vep_match_starttag[] = {
163
  { "!--esi", &VEP_COMMENTESI },
164
  { "!---->", &VEP_NEXTTAG },
165
  { "!--",  &VEP_COMMENT },
166
  { "/esi:",  &VEP_ESIENDTAG },
167
  { "esi:", &VEP_ESITAG },
168
  { "![CDATA[", &VEP_CDATA },
169
  { NULL,   &VEP_NOTMYTAG }
170
};
171
172
/*---------------------------------------------------------------------*/
173
174
static struct vep_match vep_match_esi[] = {
175
  { "include",  &VEP_ESIINCLUDE },
176
  { "remove", &VEP_ESIREMOVE },
177
  { "comment",  &VEP_ESICOMMENT },
178
  { NULL,   &VEP_ESIBOGON }
179
};
180
181
/*---------------------------------------------------------------------*/
182
183
static struct vep_match vep_match_attr_include[] = {
184
  { "src=", &VEP_ATTRGETVAL },
185
  { "onerror=", &VEP_ATTRGETVAL },
186
  { NULL,   &VEP_SKIPATTR }
187
};
188
189
/*---------------------------------------------------------------------*/
190
191
static struct vep_match vep_match_bom[] = {
192
  { "\xeb\xbb\xbf", &VEP_START },
193
  { NULL,     &VEP_BOM }
194
};
195
196
/*--------------------------------------------------------------------
197
 * Report a parsing error
198
 */
199
200
static void
201
vep_error(const struct vep_state *vep, const char *p)
202
16.6k
{
203
16.6k
  VSC_C_main->esi_errors++;
204
16.6k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s",
205
16.6k
       vep->o_last, p);
206
16.6k
}
207
208
/*--------------------------------------------------------------------
209
 * Report a parsing warning
210
 */
211
212
static void
213
vep_warn(const struct vep_state *vep, const char *p)
214
10.8k
{
215
10.8k
  VSC_C_main->esi_warnings++;
216
10.8k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s",
217
10.8k
       vep->o_last, p);
218
10.8k
}
219
220
/*---------------------------------------------------------------------
221
 * return match or NULL if more input needed.
222
 */
223
224
static struct vep_match *
225
vep_match(const struct vep_state *vep, const char *b, const char *e)
226
149k
{
227
149k
  struct vep_match *vm;
228
149k
  const char *q, *r;
229
230
149k
  AN(vep->match);
231
383k
  for (vm = vep->match; vm->match != NULL; vm++) {
232
356k
    assert(strlen(vm->match) <= sizeof (vep->tag));
233
356k
    r = b;
234
1.05M
    for (q = vm->match; *q != '\0' && r < e; q++, r++)
235
933k
      if (*q != *r)
236
234k
        break;
237
356k
    if (*q == '\0')
238
122k
      break;
239
234k
    if (r == e)
240
46
      return (NULL);
241
234k
  }
242
149k
  return (vm);
243
149k
}
244
245
/*---------------------------------------------------------------------
246
 *
247
 */
248
249
static void
250
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
251
254k
{
252
254k
  uint8_t buf[9];
253
254
254k
  assert(l > 0);
255
254k
  if (l < 256) {
256
253k
    buf[0] = (uint8_t)m8;
257
253k
    buf[1] = (uint8_t)l;
258
253k
    assert((ssize_t)buf[1] == l);
259
253k
    VSB_bcat(vep->vsb, buf, 2);
260
253k
  } else if (l < 65536) {
261
1.12k
    buf[0] = (uint8_t)m16;
262
1.12k
    vbe16enc(buf + 1, (uint16_t)l);
263
1.12k
    assert((ssize_t)vbe16dec(buf + 1) == l);
264
1.12k
    VSB_bcat(vep->vsb, buf, 3);
265
1.12k
  } else {
266
145
    buf[0] = (uint8_t)m64;
267
145
    vbe64enc(buf + 1, l);
268
145
    assert((ssize_t)vbe64dec(buf + 1) == l);
269
145
    VSB_bcat(vep->vsb, buf, 9);
270
145
  }
271
254k
}
272
273
static void
274
vep_emit_skip(const struct vep_state *vep, ssize_t l)
275
137k
{
276
277
137k
  vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
278
137k
}
279
280
static void
281
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
282
116k
{
283
116k
  uint8_t buf[4];
284
285
116k
  vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
286
116k
  if (vep->dogzip) {
287
0
    vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
288
0
    vbe32enc(buf, vep->crc);
289
0
    VSB_bcat(vep->vsb, buf, sizeof buf);
290
0
  }
291
116k
}
292
293
static void
294
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
295
255k
{
296
297
255k
  assert(l >= 0);
298
255k
  if (l == 0)
299
1.52k
    return;
300
254k
  assert(mark == SKIP || mark == VERBATIM);
301
254k
  if (mark == SKIP)
302
137k
    vep_emit_skip(vep, l);
303
116k
  else
304
116k
    vep_emit_verbatim(vep, l, vep->o_crc);
305
306
254k
  vep->crc = crc32(0L, Z_NULL, 0);
307
254k
  vep->o_crc = 0;
308
254k
  vep->o_total += l;
309
254k
}
310
311
/*---------------------------------------------------------------------
312
 *
313
 */
314
315
static void
316
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
317
326k
{
318
326k
  ssize_t l, lcb;
319
320
326k
  assert(mark == SKIP || mark == VERBATIM);
321
322
  /* The NO-OP case, no data, no pending data & no change of mode */
323
326k
  if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
324
0
    return;
325
326
  /*
327
   * If we changed mode, emit whatever the opposite mode
328
   * assembled before the pending bytes.
329
   */
330
331
326k
  if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
332
254k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0,
333
254k
        mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
334
254k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
335
254k
    vep->o_last = lcb;
336
254k
    vep->o_wait = 0;
337
254k
  }
338
339
  /* Transfer pending bytes CRC into active mode CRC */
340
326k
  if (vep->o_pending) {
341
217
    (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
342
217
         VGZ_NORMAL);
343
217
    if (vep->o_crc == 0) {
344
56
      vep->crc = vep->crcp;
345
56
      vep->o_crc = vep->o_pending;
346
161
    } else {
347
161
      vep->crc = crc32_combine(vep->crc,
348
161
          vep->crcp, vep->o_pending);
349
161
      vep->o_crc += vep->o_pending;
350
161
    }
351
217
    vep->crcp = crc32(0L, Z_NULL, 0);
352
217
    vep->o_wait += vep->o_pending;
353
217
    vep->o_pending = 0;
354
217
  }
355
356
  /* * Process this bit of input */
357
326k
  AN(vep->ver_p);
358
326k
  l = p - vep->ver_p;
359
326k
  assert(l >= 0);
360
326k
  vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
361
326k
  vep->o_crc += l;
362
326k
  vep->ver_p = p;
363
364
326k
  vep->o_wait += l;
365
326k
  vep->last_mark = mark;
366
326k
  (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
367
326k
}
368
369
static void
370
vep_mark_verbatim(struct vep_state *vep, const char *p)
371
160k
{
372
373
160k
  vep_mark_common(vep, p, VERBATIM);
374
160k
  vep->nm_verbatim++;
375
160k
}
376
377
static void
378
vep_mark_skip(struct vep_state *vep, const char *p)
379
163k
{
380
381
163k
  vep_mark_common(vep, p, SKIP);
382
163k
  vep->nm_skip++;
383
163k
}
384
385
static void
386
vep_mark_pending(struct vep_state *vep, const char *p)
387
217
{
388
217
  ssize_t l;
389
390
217
  AN(vep->ver_p);
391
217
  l = p - vep->ver_p;
392
217
  assert(l > 0);
393
217
  vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
394
217
  vep->ver_p = p;
395
396
217
  vep->o_pending += l;
397
217
  vep->nm_pending++;
398
217
}
399
400
/*---------------------------------------------------------------------
401
 */
402
403
static void v_matchproto_()
404
vep_do_comment(struct vep_state *vep, enum dowhat what)
405
496
{
406
496
  Debug("DO_COMMENT(%d)\n", what);
407
496
  assert(what == DO_TAG);
408
496
  if (!vep->emptytag)
409
217
    vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
410
496
}
411
412
/*---------------------------------------------------------------------
413
 */
414
415
static void v_matchproto_()
416
vep_do_remove(struct vep_state *vep, enum dowhat what)
417
0
{
418
0
  Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
419
0
      what, vep->endtag, vep->emptytag, vep->remove);
420
0
  assert(what == DO_TAG);
421
0
  if (vep->emptytag)
422
0
    vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
423
0
  else if (vep->remove && !vep->endtag)
424
0
    vep_error(vep, "ESI 1.0 <esi:remove> already open");
425
0
  else if (!vep->remove && vep->endtag)
426
0
    vep_error(vep, "ESI 1.0 <esi:remove> not open");
427
0
  else
428
0
    vep->remove = !vep->endtag;
429
0
}
430
431
/*---------------------------------------------------------------------
432
 */
433
434
static void
435
include_attr_src(struct vep_state *vep)
436
15.3k
{
437
15.3k
  const char *p;
438
439
15.3k
  if (vep->include_src != NULL) {
440
2.30k
    vep_error(vep,
441
2.30k
        "ESI 1.0 <esi:include> "
442
2.30k
        "has multiple src= attributes");
443
2.30k
    vep->state = VEP_TAGERROR;
444
2.30k
    VSB_destroy(&vep->attr_vsb);
445
2.30k
    VSB_destroy(&vep->include_src);
446
2.30k
    return;
447
2.30k
  }
448
15.1M
  for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
449
15.1M
    if (vct_islws(*p))
450
1.69k
      break;
451
13.0k
  if (*p != '\0') {
452
1.69k
    vep_error(vep,
453
1.69k
        "ESI 1.0 <esi:include> "
454
1.69k
        "has whitespace in src= attribute");
455
1.69k
    vep->state = VEP_TAGERROR;
456
1.69k
    VSB_destroy(&vep->attr_vsb);
457
1.69k
    if (vep->include_src != NULL)
458
0
      VSB_destroy(&vep->include_src);
459
1.69k
    return;
460
1.69k
  }
461
11.3k
  vep->include_src = vep->attr_vsb;
462
11.3k
  vep->attr_vsb = NULL;
463
11.3k
}
464
465
static void
466
include_attr_onerror(struct vep_state *vep)
467
0
{
468
469
0
  vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb));
470
0
  VSB_destroy(&vep->attr_vsb);
471
0
}
472
473
static void v_matchproto_()
474
vep_do_include(struct vep_state *vep, enum dowhat what)
475
25.3k
{
476
25.3k
  const char *p, *q, *h;
477
25.3k
  ssize_t l;
478
25.3k
  char incl;
479
480
25.3k
  Debug("DO_INCLUDE(%d)\n", what);
481
25.3k
  if (what == DO_ATTR) {
482
15.3k
    Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
483
15.3k
      VSB_data(vep->attr_vsb));
484
15.3k
    if (!strcmp("src=", vep->match_hit->match)) {
485
15.3k
      include_attr_src(vep);
486
15.3k
      return;
487
15.3k
    }
488
0
    if (!strcmp("onerror=", vep->match_hit->match)) {
489
0
      include_attr_onerror(vep);
490
0
      return;
491
0
    }
492
0
    WRONG("Unhandled <esi:include> attribute");
493
0
  }
494
10.0k
  assert(what == DO_TAG);
495
10.0k
  if (!vep->emptytag)
496
9.45k
    vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
497
10.0k
  if (vep->include_src == NULL) {
498
1.05k
    vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
499
1.05k
    return;
500
1.05k
  }
501
502
  /*
503
   * Strictly speaking, we ought to spit out any piled up skip before
504
   * emitting the VEC for the include, but objectively that makes no
505
   * difference and robs us of a chance to collapse another skip into
506
   * this on so we don't do that.
507
   * However, we cannot tolerate any verbatim stuff piling up.
508
   * The mark_skip() before calling dostuff should have taken
509
   * care of that.  Make sure.
510
   */
511
8.97k
  assert(vep->o_wait == 0 || vep->last_mark == SKIP);
512
  /* XXX: what if it contains NUL bytes ?? */
513
8.97k
  p = VSB_data(vep->include_src);
514
8.97k
  l = VSB_len(vep->include_src);
515
8.97k
  h = 0;
516
517
8.97k
  incl = vep->include_continue ? VEC_IC : VEC_IA;
518
519
8.97k
  if (l > 7 && !memcmp(p, "http://", 7)) {
520
3.65k
    h = p + 7;
521
3.65k
    p = strchr(h, '/');
522
3.65k
    if (p == NULL) {
523
332
      vep_error(vep,
524
332
          "ESI 1.0 <esi:include> invalid src= URL");
525
332
      vep->state = VEP_TAGERROR;
526
332
      AZ(vep->attr_vsb);
527
332
      VSB_destroy(&vep->include_src);
528
332
      return;
529
332
    }
530
3.32k
    Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
531
3.32k
    VSB_printf(vep->vsb, "%c", incl);
532
3.32k
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
533
5.32k
  } else if (l > 8 && !memcmp(p, "https://", 8)) {
534
1.44k
    if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
535
560
      vep_warn(vep,
536
560
          "ESI 1.0 <esi:include> with https:// ignored");
537
560
      vep->state = VEP_TAGERROR;
538
560
      AZ(vep->attr_vsb);
539
560
      VSB_destroy(&vep->include_src);
540
560
      return;
541
560
    }
542
885
    vep_warn(vep,
543
885
        "ESI 1.0 <esi:include> https:// treated as http://");
544
885
    h = p + 8;
545
885
    p = strchr(h, '/');
546
885
    if (p == NULL) {
547
552
      vep_error(vep,
548
552
          "ESI 1.0 <esi:include> invalid src= URL");
549
552
      vep->state = VEP_TAGERROR;
550
552
      AZ(vep->attr_vsb);
551
552
      VSB_destroy(&vep->include_src);
552
552
      return;
553
552
    }
554
333
    VSB_printf(vep->vsb, "%c", incl);
555
333
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
556
3.87k
  } else if (*p == '/') {
557
754
    VSB_printf(vep->vsb, "%c", incl);
558
754
    VSB_printf(vep->vsb, "%c", 0);
559
3.12k
  } else {
560
3.12k
    VSB_printf(vep->vsb, "%c", incl);
561
3.12k
    VSB_printf(vep->vsb, "%c", 0);
562
    /* Look for the last / before a '?' */
563
3.12k
    h = NULL;
564
6.24k
    for (q = vep->url; *q && *q != '?'; q++)
565
3.12k
      if (*q == '/')
566
3.12k
        h = q;
567
3.12k
    if (h == NULL)
568
0
      h = q + 1;
569
570
3.12k
    Debug("INCL:: [%.*s]/[%s]\n",
571
3.12k
        (int)(h - vep->url), vep->url, p);
572
3.12k
    VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
573
3.12k
  }
574
7.53k
  l -= (p - VSB_data(vep->include_src));
575
7.99M
  for (q = p; *q != '\0'; ) {
576
7.99M
    if (*q == '&') {
577
12.7k
#define R(w,f,r)              \
578
54.1k
      if (q + w <= p + l && !memcmp(q, f, w)) { \
579
7.17k
        VSB_printf(vep->vsb, "%c", r);  \
580
7.17k
        q += w;       \
581
7.17k
        continue;     \
582
7.17k
      }
583
12.7k
      R(6, "&apos;", '\'');
584
12.2k
      R(6, "&quot;", '"');
585
11.8k
      R(4, "&lt;", '<');
586
11.5k
      R(4, "&gt;", '>');
587
5.85k
      R(5, "&amp;", '&');
588
5.55k
    }
589
7.98M
    VSB_printf(vep->vsb, "%c", *q++);
590
7.98M
  }
591
7.53k
#undef R
592
7.53k
  VSB_printf(vep->vsb, "%c", 0);
593
7.53k
  VSB_destroy(&vep->include_src);
594
7.53k
  vep->include_continue = 0;
595
7.53k
}
596
597
/*---------------------------------------------------------------------
598
 * Lex/Parse object for ESI instructions
599
 *
600
 * This function is called with the input object piecemeal so do not
601
 * assume that we have more than one char available at at time, but
602
 * optimize for getting huge chunks.
603
 *
604
 * NB: At the bottom of this source-file, there is a dot-diagram matching
605
 * NB: the state-machine.  Please maintain it along with the code.
606
 */
607
608
void
609
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
610
1.52k
{
611
1.52k
  const char *e;
612
1.52k
  struct vep_match *vm;
613
1.52k
  int i;
614
615
1.52k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
616
1.52k
  assert(l > 0);
617
618
1.52k
  if (vep->startup) {
619
    /*
620
     * We must force the GZIP header out as a SKIP string,
621
     * otherwise an object starting with <esi:include would
622
     * have its GZIP header appear after the included object
623
     * (e000026.vtc)
624
     */
625
1.52k
    vep->ver_p = "";
626
1.52k
    vep->last_mark = SKIP;
627
1.52k
    vep_mark_common(vep, vep->ver_p, VERBATIM);
628
1.52k
    vep->startup = 0;
629
1.52k
    AZ(vep->hack_p);
630
1.52k
    vep->hack_p = p;
631
1.52k
  }
632
633
1.52k
  vep->ver_p = p;
634
635
1.52k
  e = p + l;
636
637
626k
  while (p < e) {
638
624k
    AN(vep->state);
639
624k
    Debug("EP %s %d (%.*s) [%.*s]\n",
640
624k
        vep->state,
641
624k
        vep->remove,
642
624k
        vep->tag_i, vep->tag,
643
624k
        (e - p) > 10 ? 10 : (int)(e-p), p);
644
624k
    assert(p >= vep->ver_p);
645
646
    /******************************************************
647
     * SECTION A
648
     */
649
650
624k
    if (vep->state == VEP_START) {
651
3.32k
      if (FEATURE(FEATURE_ESI_REMOVE_BOM) &&
652
3.23k
          *p == (char)0xeb) {
653
1.83k
        vep->match = vep_match_bom;
654
1.83k
        vep->state = VEP_MATCH;
655
1.83k
      } else
656
1.48k
        vep->state = VEP_BOM;
657
621k
    } else if (vep->state == VEP_BOM) {
658
1.50k
      vep_mark_skip(vep, p);
659
1.50k
      if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
660
1.42k
        vep->state = VEP_NEXTTAG;
661
71
      else
662
71
        vep->state = VEP_TESTXML;
663
619k
    } else if (vep->state == VEP_TESTXML) {
664
      /*
665
       * If the first non-whitespace char is different
666
       * from '<' we assume this is not XML.
667
       */
668
825
      while (p < e && vct_islws(*p))
669
754
        p++;
670
71
      vep_mark_verbatim(vep, p);
671
71
      if (p < e && *p == '<') {
672
9
        p++;
673
9
        vep->state = VEP_STARTTAG;
674
62
      } else if (p < e && *p == (char)0xeb) {
675
3
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
676
3
            "WARN: No ESI processing, "
677
3
            "first char not '<' but BOM."
678
3
            " (See feature esi_remove_bom)"
679
3
        );
680
3
        vep->state = VEP_NOTXML;
681
59
      } else if (p < e) {
682
50
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
683
50
            "WARN: No ESI processing, "
684
50
            "first char not '<'."
685
50
            " (See feature esi_disable_xml_check)"
686
50
        );
687
50
        vep->state = VEP_NOTXML;
688
50
      }
689
619k
    } else if (vep->state == VEP_NOTXML) {
690
      /*
691
       * This is not recognized as XML, just skip thru
692
       * vfp_esi_end() will handle the rest
693
       */
694
53
      p = e;
695
53
      vep_mark_verbatim(vep, p);
696
697
    /******************************************************
698
     * SECTION B
699
     */
700
701
619k
    } else if (vep->state == VEP_NOTMYTAG) {
702
18.0k
      if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
703
17.4k
        p++;
704
17.4k
        vep->state = VEP_NEXTTAG;
705
17.4k
      } else {
706
516
        vep->tag_i = 0;
707
31.7k
        while (p < e) {
708
31.6k
          if (*p++ == '>') {
709
506
            vep->state = VEP_NEXTTAG;
710
506
            break;
711
506
          }
712
31.6k
        }
713
516
      }
714
18.0k
      if (p == e && !vep->remove)
715
41
        vep_mark_verbatim(vep, p);
716
601k
    } else if (vep->state == VEP_NEXTTAG) {
717
      /*
718
       * Hunt for start of next tag and keep an eye
719
       * out for end of EsiCmt if armed.
720
       */
721
102k
      vep->emptytag = 0;
722
102k
      vep->attr = NULL;
723
102k
      vep->dostuff = NULL;
724
14.7M
      while (p < e && *p != '<') {
725
14.6M
        if (vep->esicmt_p == NULL) {
726
13.9M
          p++;
727
13.9M
          continue;
728
13.9M
        }
729
683k
        if (*p != *vep->esicmt_p) {
730
512k
          p++;
731
512k
          vep->esicmt_p = vep->esicmt;
732
512k
          continue;
733
512k
        }
734
171k
        if (!vep->remove && vep->esicmt_p == vep->esicmt)
735
58.6k
          vep_mark_verbatim(vep, p);
736
171k
        p++;
737
171k
        if (*++vep->esicmt_p == '\0') {
738
55.9k
          vep->esi_found = 1;
739
55.9k
          vep->esicmt = NULL;
740
55.9k
          vep->esicmt_p = NULL;
741
          /*
742
           * The end of the esicmt
743
           * should not be emitted.
744
           * But the stuff before should
745
           */
746
55.9k
          vep_mark_skip(vep, p);
747
55.9k
        }
748
171k
      }
749
102k
      if (p < e) {
750
101k
        if (!vep->remove)
751
101k
          vep_mark_verbatim(vep, p);
752
101k
        assert(*p == '<');
753
101k
        p++;
754
101k
        vep->state = VEP_STARTTAG;
755
101k
      } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
756
271
        vep_mark_verbatim(vep, p);
757
758
    /******************************************************
759
     * SECTION C
760
     */
761
762
499k
    } else if (vep->state == VEP_STARTTAG) {
763
      /* Start of tag, set up match table */
764
101k
      vep->endtag = 0;
765
101k
      vep->match = vep_match_starttag;
766
101k
      vep->state = VEP_MATCH;
767
397k
    } else if (vep->state == VEP_COMMENT) {
768
1.55k
      vep->esicmt_p = vep->esicmt = NULL;
769
1.55k
      vep->until_p = vep->until = "-->";
770
1.55k
      vep->until_s = VEP_NEXTTAG;
771
1.55k
      vep->state = VEP_UNTIL;
772
396k
    } else if (vep->state == VEP_COMMENTESI) {
773
56.7k
      if (vep->remove)
774
0
        vep_error(vep,
775
0
            "ESI 1.0 Nested <!--esi"
776
0
            " element in <esi:remove>");
777
56.7k
      vep->esicmt_p = vep->esicmt = "-->";
778
56.7k
      vep->state = VEP_NEXTTAG;
779
56.7k
      vep_mark_skip(vep, p);
780
339k
    } else if (vep->state == VEP_CDATA) {
781
      /*
782
       * Easy: just look for the end of CDATA
783
       */
784
294
      vep->until_p = vep->until = "]]>";
785
294
      vep->until_s = VEP_NEXTTAG;
786
294
      vep->state = VEP_UNTIL;
787
339k
    } else if (vep->state == VEP_ESIENDTAG) {
788
1.00k
      vep->endtag = 1;
789
1.00k
      vep->state = VEP_ESITAG;
790
338k
    } else if (vep->state == VEP_ESITAG) {
791
24.6k
      vep->in_esi_tag = 1;
792
24.6k
      vep->esi_found = 1;
793
24.6k
      vep_mark_skip(vep, p);
794
24.6k
      vep->match = vep_match_esi;
795
24.6k
      vep->state = VEP_MATCH;
796
313k
    } else if (vep->state == VEP_ESIINCLUDE) {
797
19.5k
      if (vep->remove) {
798
0
        vep_error(vep,
799
0
            "ESI 1.0 <esi:include> element"
800
0
            " nested in <esi:remove>");
801
0
        vep->state = VEP_TAGERROR;
802
19.5k
      } else if (vep->endtag) {
803
308
        vep_error(vep,
804
308
            "ESI 1.0 </esi:include> illegal end-tag");
805
308
        vep->state = VEP_TAGERROR;
806
19.2k
      } else {
807
19.2k
        vep->dostuff = vep_do_include;
808
19.2k
        vep->state = VEP_INTAG;
809
19.2k
        vep->attr = vep_match_attr_include;
810
19.2k
      }
811
293k
    } else if (vep->state == VEP_ESIREMOVE) {
812
0
      vep->dostuff = vep_do_remove;
813
0
      vep->state = VEP_INTAG;
814
293k
    } else if (vep->state == VEP_ESICOMMENT) {
815
884
      if (vep->remove) {
816
0
        vep_error(vep,
817
0
            "ESI 1.0 <esi:comment> element"
818
0
            " nested in <esi:remove>");
819
0
        vep->state = VEP_TAGERROR;
820
884
      } else if (vep->endtag) {
821
316
        vep_error(vep,
822
316
            "ESI 1.0 </esi:comment> illegal end-tag");
823
316
        vep->state = VEP_TAGERROR;
824
568
      } else {
825
568
        vep->dostuff = vep_do_comment;
826
568
        vep->state = VEP_INTAG;
827
568
      }
828
292k
    } else if (vep->state == VEP_ESIBOGON) {
829
4.16k
      vep_error(vep,
830
4.16k
          "ESI 1.0 <esi:bogus> element");
831
4.16k
      vep->state = VEP_TAGERROR;
832
833
    /******************************************************
834
     * SECTION D
835
     */
836
837
288k
    } else if (vep->state == VEP_INTAG) {
838
36.6k
      vep->tag_i = 0;
839
44.4k
      while (p < e && vct_islws(*p) && !vep->emptytag) {
840
7.79k
        p++;
841
7.79k
        vep->canattr = 1;
842
7.79k
      }
843
36.6k
      if (p < e && *p == '/' && !vep->emptytag) {
844
1.50k
        p++;
845
1.50k
        vep->emptytag = 1;
846
1.50k
        vep->canattr = 0;
847
1.50k
      }
848
36.6k
      if (p < e && *p == '>') {
849
10.5k
        p++;
850
10.5k
        AN(vep->dostuff);
851
10.5k
        vep_mark_skip(vep, p);
852
10.5k
        vep->dostuff(vep, DO_TAG);
853
10.5k
        vep->in_esi_tag = 0;
854
10.5k
        vep->state = VEP_NEXTTAG;
855
26.1k
      } else if (p < e && vep->emptytag) {
856
639
        vep_error(vep,
857
639
            "XML 1.0 '>' does not follow '/' in tag");
858
639
        vep->state = VEP_TAGERROR;
859
25.4k
      } else if (p < e && vep->canattr &&
860
24.7k
          vct_isxmlnamestart(*p)) {
861
23.8k
        vep->state = VEP_ATTR;
862
23.8k
      } else if (p < e) {
863
1.61k
        vep_error(vep,
864
1.61k
            "XML 1.0 Illegal attribute start char");
865
1.61k
        vep->state = VEP_TAGERROR;
866
1.61k
      }
867
252k
    } else if (vep->state == VEP_TAGERROR) {
868
3.21M
      while (p < e && *p != '>')
869
3.20M
        p++;
870
13.8k
      if (p < e) {
871
13.7k
        p++;
872
13.7k
        vep_mark_skip(vep, p);
873
13.7k
        vep->in_esi_tag = 0;
874
13.7k
        vep->state = VEP_NEXTTAG;
875
13.7k
        if (vep->attr_vsb)
876
410
          VSB_destroy(&vep->attr_vsb);
877
13.7k
      }
878
879
    /******************************************************
880
     * SECTION E
881
     */
882
883
238k
    } else if (vep->state == VEP_ATTR) {
884
23.8k
      AZ(vep->attr_delim);
885
23.8k
      if (vep->attr == NULL) {
886
2.76k
        p++;
887
2.76k
        AZ(vep->attr_vsb);
888
2.76k
        vep->state = VEP_SKIPATTR;
889
21.0k
      } else {
890
21.0k
        vep->match = vep->attr;
891
21.0k
        vep->state = VEP_MATCH;
892
21.0k
      }
893
214k
    } else if (vep->state == VEP_SKIPATTR) {
894
23.9k
      while (p < e && vct_isxmlname(*p))
895
16.4k
        p++;
896
7.51k
      if (p < e && *p == '=') {
897
3.77k
        p++;
898
3.77k
        vep->state = VEP_ATTRDELIM;
899
3.77k
      } else if (p < e && *p == '>') {
900
383
        vep->state = VEP_INTAG;
901
3.35k
      } else if (p < e && *p == '/') {
902
894
        vep->state = VEP_INTAG;
903
2.45k
      } else if (p < e && vct_issp(*p)) {
904
1.06k
        vep->state = VEP_INTAG;
905
1.38k
      } else if (p < e) {
906
1.37k
        vep_error(vep,
907
1.37k
            "XML 1.0 Illegal attr char");
908
1.37k
        vep->state = VEP_TAGERROR;
909
1.37k
      }
910
206k
    } else if (vep->state == VEP_ATTRGETVAL) {
911
16.3k
      AZ(vep->attr_vsb);
912
16.3k
      vep->attr_vsb = VSB_new_auto();
913
16.3k
      vep->state = VEP_ATTRDELIM;
914
190k
    } else if (vep->state == VEP_ATTRDELIM) {
915
20.0k
      AZ(vep->attr_delim);
916
20.0k
      if (*p == '"' || *p == '\'') {
917
1.37k
        vep->attr_delim = *p++;
918
1.37k
        vep->state = VEP_ATTRVAL;
919
18.7k
      } else if (!vct_issp(*p)) {
920
18.0k
        vep->attr_delim = ' ';
921
18.0k
        vep->state = VEP_ATTRVAL;
922
18.0k
      } else {
923
614
        vep_error(vep,
924
614
            "XML 1.0 Illegal attribute delimiter");
925
614
        vep->state = VEP_TAGERROR;
926
614
      }
927
928
170k
    } else if (vep->state == VEP_ATTRVAL) {
929
22.3M
      while (p < e && *p != '>' && *p != vep->attr_delim &&
930
22.3M
         (vep->attr_delim != ' ' || !vct_issp(*p))) {
931
22.3M
        if (vep->attr_vsb != NULL)
932
22.2M
          VSB_putc(vep->attr_vsb, *p);
933
22.3M
        p++;
934
22.3M
      }
935
19.4k
      if (p < e && *p == '>') {
936
876
        vep_error(vep,
937
876
            "XML 1.0 Missing end attribute delimiter");
938
876
        vep->state = VEP_TAGERROR;
939
876
        vep->attr_delim = 0;
940
876
        if (vep->attr_vsb != NULL) {
941
526
          AZ(VSB_finish(vep->attr_vsb));
942
526
          VSB_destroy(&vep->attr_vsb);
943
526
        }
944
18.5k
      } else if (p < e) {
945
18.4k
        vep->attr_delim = 0;
946
18.4k
        p++;
947
18.4k
        vep->state = VEP_INTAG;
948
18.4k
        if (vep->attr_vsb != NULL) {
949
15.3k
          AZ(VSB_finish(vep->attr_vsb));
950
15.3k
          AN(vep->dostuff);
951
15.3k
          vep->dostuff(vep, DO_ATTR);
952
15.3k
          vep->attr_vsb = NULL;
953
15.3k
        }
954
18.4k
      }
955
956
    /******************************************************
957
     * Utility Section
958
     */
959
960
151k
    } else if (vep->state == VEP_MATCH) {
961
      /*
962
       * Match against a table
963
       */
964
149k
      vm = vep_match(vep, p, e);
965
149k
      vep->match_hit = vm;
966
149k
      if (vm != NULL) {
967
149k
        if (vm->match != NULL)
968
122k
          p += strlen(vm->match);
969
149k
        vep->state = *vm->state;
970
149k
        vep->match = NULL;
971
149k
        vep->tag_i = 0;
972
149k
      } else {
973
46
        assert(p + sizeof(vep->tag) >= e);
974
46
        memcpy(vep->tag, p, e - p);
975
46
        vep->tag_i = e - p;
976
46
        vep->state = VEP_MATCHBUF;
977
46
        p = e;
978
46
      }
979
149k
    } else if (vep->state == VEP_MATCHBUF) {
980
      /*
981
       * Match against a table while split over input
982
       * sections.
983
       */
984
0
      AN(vep->match);
985
0
      i = sizeof(vep->tag) - vep->tag_i;
986
0
      if (i > e - p)
987
0
        i = e - p;
988
0
      memcpy(vep->tag + vep->tag_i, p, i);
989
0
      vm = vep_match(vep, vep->tag,
990
0
          vep->tag + vep->tag_i + i);
991
0
      Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
992
0
          vep->tag_i + i, vep->tag,
993
0
          vep->tag_i,
994
0
          i,
995
0
          vm,
996
0
          vm ? vm->match : "(nil)");
997
998
0
      if (vm == NULL) {
999
0
        vep->tag_i += i;
1000
0
        p += i;
1001
0
        assert(p == e);
1002
0
      } else {
1003
0
        vep->match_hit = vm;
1004
0
        vep->state = *vm->state;
1005
0
        if (vm->match != NULL) {
1006
0
          i = strlen(vm->match);
1007
0
          if (i > vep->tag_i)
1008
0
            p += i - vep->tag_i;
1009
0
        }
1010
0
        vep->match = NULL;
1011
0
        vep->tag_i = 0;
1012
0
      }
1013
1.85k
    } else if (vep->state == VEP_UNTIL) {
1014
      /*
1015
       * Skip until we see magic string
1016
       */
1017
12.5M
      while (p < e) {
1018
12.5M
        if (*p++ != *vep->until_p++) {
1019
12.4M
          vep->until_p = vep->until;
1020
12.4M
        } else if (*vep->until_p == '\0') {
1021
1.79k
          vep->state = vep->until_s;
1022
1.79k
          break;
1023
1.79k
        }
1024
12.5M
      }
1025
1.85k
      if (p == e && !vep->remove)
1026
67
        vep_mark_verbatim(vep, p);
1027
1.85k
    } else {
1028
0
      Debug("*** Unknown state %s\n", vep->state);
1029
0
      WRONG("WRONG ESI PARSER STATE");
1030
0
    }
1031
624k
  }
1032
  /*
1033
   * We must always mark up the storage we got, try to do so
1034
   * in the most efficient way, in particular with respect to
1035
   * minimizing and limiting use of pending.
1036
   */
1037
1.52k
  if (p == vep->ver_p)
1038
944
    ;
1039
578
  else if (vep->in_esi_tag)
1040
361
    vep_mark_skip(vep, p);
1041
217
  else if (vep->remove)
1042
0
    vep_mark_skip(vep, p);
1043
217
  else
1044
217
    vep_mark_pending(vep, p);
1045
1.52k
}
1046
1047
/*---------------------------------------------------------------------
1048
 */
1049
1050
static ssize_t v_matchproto_(vep_callback_t)
1051
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1052
583k
{
1053
583k
  ssize_t *s;
1054
1055
583k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1056
583k
  AN(priv);
1057
583k
  s = priv;
1058
583k
  *s += l;
1059
583k
  (void)flg;
1060
583k
  return (*s);
1061
583k
}
1062
1063
/*---------------------------------------------------------------------
1064
 */
1065
1066
struct vep_state *
1067
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1068
    void *cb_priv)
1069
1.52k
{
1070
1.52k
  struct vep_state *vep;
1071
1072
1.52k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1073
1.52k
  CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1074
1.52k
  vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1075
1.52k
  if (vep == NULL) {
1076
0
    VSLb(vc->wrk->vsl, SLT_VCL_Error,
1077
0
         "VEP_Init() workspace overflow");
1078
0
    return (NULL);
1079
0
  }
1080
1081
1.52k
  INIT_OBJ(vep, VEP_MAGIC);
1082
1.52k
  vep->url = req->hd[HTTP_HDR_URL].b;
1083
1.52k
  vep->vc = vc;
1084
1.52k
  vep->vsb = VSB_new_auto();
1085
1.52k
  AN(vep->vsb);
1086
1087
1.52k
  if (cb != NULL) {
1088
0
    vep->dogzip = 1;
1089
    /* XXX */
1090
0
    VSB_printf(vep->vsb, "%c", VEC_GZ);
1091
0
    vep->cb = cb;
1092
0
    vep->cb_priv = cb_priv;
1093
1.52k
  } else {
1094
1.52k
    vep->cb = vep_default_cb;
1095
1.52k
    vep->cb_priv = &vep->cb_x;
1096
1.52k
  }
1097
1098
1.52k
  vep->state = VEP_START;
1099
1.52k
  vep->crc = crc32(0L, Z_NULL, 0);
1100
1.52k
  vep->crcp = crc32(0L, Z_NULL, 0);
1101
1102
1.52k
  vep->startup = 1;
1103
1.52k
  return (vep);
1104
1.52k
}
1105
1106
/*---------------------------------------------------------------------
1107
 */
1108
1109
struct vsb *
1110
VEP_Finish(struct vep_state *vep)
1111
1.52k
{
1112
1.52k
  ssize_t l, lcb;
1113
1114
1.52k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1115
1116
1.52k
  if (vep->include_src)
1117
50
    VSB_destroy(&vep->include_src);
1118
1.52k
  if (vep->attr_vsb)
1119
42
    VSB_destroy(&vep->attr_vsb);
1120
1121
1.52k
  if (vep->state != VEP_START &&
1122
1.51k
      vep->state != VEP_BOM &&
1123
1.51k
      vep->state != VEP_TESTXML &&
1124
1.50k
      vep->state != VEP_NOTXML &&
1125
1.44k
      vep->state != VEP_NEXTTAG) {
1126
556
    vep_error(vep, "VEP ended inside a tag");
1127
556
  }
1128
1129
1.52k
  if (vep->o_pending)
1130
217
    vep_mark_common(vep, vep->ver_p, vep->last_mark);
1131
1.52k
  if (vep->o_wait > 0) {
1132
1.49k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1133
1.49k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1134
1.49k
  }
1135
  // NB: We don't account for PAD+SUM+LEN in gzipped objects
1136
1.52k
  (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1137
1138
1.52k
  AZ(VSB_finish(vep->vsb));
1139
1.52k
  l = VSB_len(vep->vsb);
1140
1.52k
  if (vep->esi_found && l > 0)
1141
1.03k
    return (vep->vsb);
1142
487
  VSB_destroy(&vep->vsb);
1143
  return (NULL);
1144
1.52k
}
1145
1146
#if 0
1147
1148
digraph xml {
1149
  rankdir="LR"
1150
  size="7,10"
1151
#################################################################
1152
# SECTION A
1153
#
1154
1155
START   [shape=ellipse]
1156
TESTXML   [shape=ellipse]
1157
NOTXML    [shape=ellipse]
1158
NEXTTAGa  [shape=hexagon, label="NEXTTAG"]
1159
STARTTAGa [shape=hexagon, label="STARTTAG"]
1160
START   -> TESTXML
1161
START   -> NEXTTAGa [style=dotted, label="syntax:1"]
1162
TESTXML   -> TESTXML  [label="lws"]
1163
TESTXML   -> NOTXML
1164
TESTXML   -> STARTTAGa  [label="'<'"]
1165
1166
#################################################################
1167
# SECTION B
1168
1169
NOTMYTAG  [shape=ellipse]
1170
NEXTTAG   [shape=ellipse]
1171
NOTMYTAG  -> NEXTTAG  [style=dotted, label="syntax:2"]
1172
STARTTAGb [shape=hexagon, label="STARTTAG"]
1173
NOTMYTAG  -> NEXTTAG  [label="'>'"]
1174
NOTMYTAG  -> NOTMYTAG [label="*"]
1175
NEXTTAG   -> NEXTTAG  [label="'-->'"]
1176
NEXTTAG   -> NEXTTAG  [label="*"]
1177
NEXTTAG   -> STARTTAGb  [label="'<'"]
1178
1179
#################################################################
1180
# SECTION C
1181
1182
STARTTAG  [shape=ellipse]
1183
COMMENT   [shape=ellipse]
1184
CDATA   [shape=ellipse]
1185
ESITAG    [shape=ellipse]
1186
ESIETAG   [shape=ellipse]
1187
ESIINCLUDE  [shape=ellipse]
1188
ESIREMOVE [shape=ellipse]
1189
ESICOMMENT  [shape=ellipse]
1190
ESIBOGON  [shape=ellipse]
1191
INTAGc    [shape=hexagon, label="INTAG"]
1192
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"]
1193
NEXTTAGc  [shape=hexagon, label="NEXTTAG"]
1194
TAGERRORc [shape=hexagon, label="TAGERROR"]
1195
C1    [shape=circle,label=""]
1196
STARTTAG  -> COMMENT  [label="'<!--'"]
1197
STARTTAG  -> ESITAG [label="'<esi'"]
1198
STARTTAG  -> CDATA  [label="'<![CDATA['"]
1199
STARTTAG  -> NOTMYTAGc  [label="'*'"]
1200
COMMENT   -> NEXTTAGc [label="'esi'"]
1201
COMMENT   -> C1   [label="*"]
1202
C1    -> C1   [label="*"]
1203
C1    -> NEXTTAGc [label="-->"]
1204
CDATA   -> CDATA  [label="*"]
1205
CDATA   -> NEXTTAGc [label="]]>"]
1206
ESITAG    -> ESIINCLUDE [label="'include'"]
1207
ESITAG    -> ESIREMOVE  [label="'remove'"]
1208
ESITAG    -> ESICOMMENT [label="'comment'"]
1209
ESITAG    -> ESIBOGON [label="*"]
1210
ESICOMMENT  -> INTAGc
1211
ESICOMMENT  -> TAGERRORc
1212
ESICOMMENT  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1213
ESIREMOVE -> INTAGc
1214
ESIREMOVE -> TAGERRORc
1215
ESIINCLUDE  -> INTAGc
1216
ESIINCLUDE  -> TAGERRORc
1217
ESIINCLUDE  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1218
ESIBOGON  -> TAGERRORc
1219
1220
#################################################################
1221
# SECTION D
1222
1223
INTAG   [shape=ellipse]
1224
TAGERROR  [shape=ellipse]
1225
NEXTTAGd  [shape=hexagon, label="NEXTTAG"]
1226
ATTRd   [shape=hexagon, label="ATTR"]
1227
D1    [shape=circle, label=""]
1228
D2    [shape=circle, label=""]
1229
INTAG   -> D1   [label="lws"]
1230
D1    -> D2   [label="/"]
1231
INTAG   -> D2   [label="/"]
1232
INTAG   -> NEXTTAGd [label=">"]
1233
D1    -> NEXTTAGd [label=">"]
1234
D2    -> NEXTTAGd [label=">"]
1235
D1    -> ATTRd  [label="XMLstartchar"]
1236
D1    -> TAGERROR [label="*"]
1237
D2    -> TAGERROR [label="*"]
1238
TAGERROR  -> TAGERROR [label="*"]
1239
TAGERROR  -> NEXTTAGd [label="'>'"]
1240
1241
#################################################################
1242
# SECTION E
1243
1244
ATTR    [shape=ellipse]
1245
SKIPATTR  [shape=ellipse]
1246
ATTRGETVAL  [shape=ellipse]
1247
ATTRDELIM [shape=ellipse]
1248
ATTRVAL   [shape=ellipse]
1249
TAGERRORe [shape=hexagon, label="TAGERROR"]
1250
INTAGe    [shape=hexagon, label="INTAG"]
1251
ATTR    -> SKIPATTR [label="*"]
1252
ATTR    -> ATTRGETVAL [label="wanted attr"]
1253
SKIPATTR  -> SKIPATTR [label="XMLname"]
1254
SKIPATTR  -> ATTRDELIM  [label="'='"]
1255
SKIPATTR  -> TAGERRORe  [label="*"]
1256
ATTRGETVAL  -> ATTRDELIM
1257
ATTRDELIM -> ATTRVAL  [label="\""]
1258
ATTRDELIM -> ATTRVAL  [label="\'"]
1259
ATTRDELIM -> ATTRVAL  [label="*"]
1260
ATTRDELIM -> TAGERRORe  [label="lws"]
1261
ATTRVAL   -> TAGERRORe  [label="'>'"]
1262
ATTRVAL   -> INTAGe [label="delim"]
1263
ATTRVAL   -> ATTRVAL  [label="*"]
1264
1265
}
1266
1267
#endif