Coverage Report

Created: 2026-06-13 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vinyl-cache/bin/vinyld/cache/cache_esi_parse.c
Line
Count
Source
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * VEP Vinyl Esi Parsing
31
 */
32
33
#include "config.h"
34
35
#include "cache_vinyld.h"
36
#include "cache_filter.h"
37
38
#include "cache_vgz.h"
39
#include "cache_esi.h"
40
#include "vct.h"
41
#include "vend.h"
42
#include "vgz.h"
43
44
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
45
#define Debug(fmt, ...) /**/
46
47
struct vep_state;
48
49
enum dowhat {DO_ATTR, DO_TAG};
50
typedef void dostuff_f(struct vep_state *, enum dowhat);
51
52
struct vep_match {
53
  const char  *match;
54
  const char  * const *state;
55
};
56
57
enum vep_mark { VERBATIM = 0, SKIP };
58
59
struct vep_state {
60
  unsigned    magic;
61
#define VEP_MAGIC   0x55cb9b82
62
  // flags from bereq
63
  struct vep_flags  flags;
64
65
  struct vsb    *vsb;
66
67
  const char    *url;
68
  struct vfp_ctx    *vc;
69
  int     dogzip;
70
  vep_callback_t    *cb;
71
  void      *cb_priv;
72
73
  /* Internal Counter for default call-back function */
74
  ssize_t     cb_x;
75
76
  /* parser state */
77
  const char    *state;
78
  unsigned    startup;
79
  unsigned    esi_found;
80
81
  unsigned    endtag;
82
  unsigned    emptytag;
83
  unsigned    canattr;
84
85
  unsigned    remove;
86
87
  ssize_t     o_wait;
88
  ssize_t     o_pending;
89
  ssize_t     o_total;
90
  uint32_t    crc;
91
  ssize_t     o_crc;
92
  uint32_t    crcp;
93
  ssize_t     o_last;
94
95
  const char    *hack_p;
96
  const char    *ver_p;
97
98
  const char    *until;
99
  const char    *until_p;
100
  const char    *until_s;
101
102
  int     in_esi_tag;
103
104
  const char    *esicmt;
105
  const char    *esicmt_p;
106
107
  struct vep_match  *attr;
108
  struct vsb    *attr_vsb;
109
  int     attr_delim;
110
111
  struct vep_match  *match;
112
  struct vep_match  *match_hit;
113
114
  char      tag[8];
115
  int     tag_i;
116
117
  dostuff_f   *dostuff;
118
119
  struct vsb    *include_src;
120
  unsigned    include_continue;
121
122
  unsigned    nm_skip;
123
  unsigned    nm_verbatim;
124
  unsigned    nm_pending;
125
  enum vep_mark   last_mark;
126
};
127
128
/*---------------------------------------------------------------------*/
129
130
static const char * const VEP_START =   "[Start]";
131
static const char * const VEP_BOM =   "[BOM]";
132
static const char * const VEP_TESTXML =   "[TestXml]";
133
static const char * const VEP_NOTXML =    "[NotXml]";
134
135
static const char * const VEP_NEXTTAG =   "[NxtTag]";
136
static const char * const VEP_NOTMYTAG =  "[NotMyTag]";
137
138
static const char * const VEP_STARTTAG =  "[StartTag]";
139
static const char * const VEP_COMMENTESI =  "[CommentESI]";
140
static const char * const VEP_COMMENT =   "[Comment]";
141
static const char * const VEP_CDATA =   "[CDATA]";
142
static const char * const VEP_ESITAG =    "[ESITag]";
143
static const char * const VEP_ESIENDTAG = "[/ESITag]";
144
145
static const char * const VEP_ESIREMOVE = "[ESI:Remove]";
146
static const char * const VEP_ESIINCLUDE =  "[ESI:Include]";
147
static const char * const VEP_ESICOMMENT =  "[ESI:Comment]";
148
static const char * const VEP_ESIBOGON =  "[ESI:Bogon]";
149
150
static const char * const VEP_INTAG =   "[InTag]";
151
static const char * const VEP_TAGERROR =  "[TagError]";
152
153
static const char * const VEP_ATTR =    "[Attribute]";
154
static const char * const VEP_SKIPATTR =  "[SkipAttribute]";
155
static const char * const VEP_ATTRDELIM = "[AttrDelim]";
156
static const char * const VEP_ATTRGETVAL =  "[AttrGetValue]";
157
static const char * const VEP_ATTRVAL =   "[AttrValue]";
158
159
static const char * const VEP_UNTIL =   "[Until]";
160
static const char * const VEP_MATCHBUF =  "[MatchBuf]";
161
static const char * const VEP_MATCH =   "[Match]";
162
163
/*---------------------------------------------------------------------*/
164
165
static struct vep_match vep_match_starttag[] = {
166
  { "!--esi", &VEP_COMMENTESI },
167
  { "!---->", &VEP_NEXTTAG },
168
  { "!--",  &VEP_COMMENT },
169
  { "/esi:",  &VEP_ESIENDTAG },
170
  { "esi:", &VEP_ESITAG },
171
  { "![CDATA[", &VEP_CDATA },
172
  { NULL,   &VEP_NOTMYTAG }
173
};
174
175
/*---------------------------------------------------------------------*/
176
177
static struct vep_match vep_match_esi[] = {
178
  { "include",  &VEP_ESIINCLUDE },
179
  { "remove", &VEP_ESIREMOVE },
180
  { "comment",  &VEP_ESICOMMENT },
181
  { NULL,   &VEP_ESIBOGON }
182
};
183
184
/*---------------------------------------------------------------------*/
185
186
static struct vep_match vep_match_attr_include[] = {
187
  { "src=", &VEP_ATTRGETVAL },
188
  { "onerror=", &VEP_ATTRGETVAL },
189
  { NULL,   &VEP_SKIPATTR }
190
};
191
192
/*---------------------------------------------------------------------*/
193
194
static struct vep_match vep_match_bom[] = {
195
  { "\xeb\xbb\xbf", &VEP_START },
196
  { NULL,     &VEP_BOM }
197
};
198
199
/*--------------------------------------------------------------------
200
 * Report a parsing error
201
 */
202
203
static void
204
vep_error(const struct vep_state *vep, const char *p)
205
31.5k
{
206
31.5k
  VSC_C_main->esi_errors++;
207
31.5k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s",
208
31.5k
       vep->o_last, p);
209
31.5k
}
210
211
/*--------------------------------------------------------------------
212
 * Report a parsing warning
213
 */
214
215
static void
216
vep_warn(const struct vep_state *vep, const char *p)
217
9.75k
{
218
9.75k
  VSC_C_main->esi_warnings++;
219
9.75k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s",
220
9.75k
       vep->o_last, p);
221
9.75k
}
222
223
/*---------------------------------------------------------------------
224
 * return match or NULL if more input needed.
225
 */
226
227
static struct vep_match *
228
vep_match(const struct vep_state *vep, const char *b, const char *e)
229
163k
{
230
163k
  struct vep_match *vm;
231
163k
  const char *q, *r;
232
233
163k
  AN(vep->match);
234
524k
  for (vm = vep->match; vm->match != NULL; vm++) {
235
490k
    assert(vstrlen(vm->match) <= sizeof (vep->tag));
236
490k
    r = b;
237
1.24M
    for (q = vm->match; *q != '\0' && r < e; q++, r++)
238
1.11M
      if (*q != *r)
239
361k
        break;
240
490k
    if (*q == '\0')
241
129k
      break;
242
361k
    if (r == e)
243
40
      return (NULL);
244
361k
  }
245
163k
  return (vm);
246
163k
}
247
248
/*---------------------------------------------------------------------
249
 *
250
 */
251
252
static void
253
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
254
177k
{
255
177k
  uint8_t buf[9];
256
257
177k
  assert(l > 0);
258
177k
  if (l < 256) {
259
176k
    buf[0] = (uint8_t)m8;
260
176k
    buf[1] = (uint8_t)l;
261
176k
    assert((ssize_t)buf[1] == l);
262
176k
    VSB_bcat(vep->vsb, buf, 2);
263
176k
  } else if (l < 65536) {
264
1.34k
    buf[0] = (uint8_t)m16;
265
1.34k
    vbe16enc(buf + 1, (uint16_t)l);
266
1.34k
    assert((ssize_t)vbe16dec(buf + 1) == l);
267
1.34k
    VSB_bcat(vep->vsb, buf, 3);
268
1.34k
  } else {
269
158
    buf[0] = (uint8_t)m64;
270
158
    vbe64enc(buf + 1, l);
271
158
    assert((ssize_t)vbe64dec(buf + 1) == l);
272
158
    VSB_bcat(vep->vsb, buf, 9);
273
158
  }
274
177k
}
275
276
static void
277
vep_emit_skip(const struct vep_state *vep, ssize_t l)
278
99.8k
{
279
280
99.8k
  vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
281
99.8k
}
282
283
static void
284
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
285
77.9k
{
286
77.9k
  uint8_t buf[4];
287
288
77.9k
  vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
289
77.9k
  if (vep->dogzip) {
290
0
    vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
291
0
    vbe32enc(buf, vep->crc);
292
0
    VSB_bcat(vep->vsb, buf, sizeof buf);
293
0
  }
294
77.9k
}
295
296
static void
297
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
298
179k
{
299
300
179k
  assert(l >= 0);
301
179k
  if (l == 0)
302
1.62k
    return;
303
177k
  assert(mark == SKIP || mark == VERBATIM);
304
177k
  if (mark == SKIP)
305
99.8k
    vep_emit_skip(vep, l);
306
77.9k
  else
307
77.9k
    vep_emit_verbatim(vep, l, vep->o_crc);
308
309
177k
  vep->crc = crc32(0L, Z_NULL, 0);
310
177k
  vep->o_crc = 0;
311
177k
  vep->o_total += l;
312
177k
}
313
314
/*---------------------------------------------------------------------
315
 *
316
 */
317
318
static void
319
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
320
284k
{
321
284k
  ssize_t l, lcb;
322
323
284k
  assert(mark == SKIP || mark == VERBATIM);
324
325
  /* The NO-OP case, no data, no pending data & no change of mode */
326
284k
  if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
327
0
    return;
328
329
  /*
330
   * If we changed mode, emit whatever the opposite mode
331
   * assembled before the pending bytes.
332
   */
333
334
284k
  if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
335
177k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0,
336
177k
        mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
337
177k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
338
177k
    vep->o_last = lcb;
339
177k
    vep->o_wait = 0;
340
177k
  }
341
342
  /* Transfer pending bytes CRC into active mode CRC */
343
284k
  if (vep->o_pending) {
344
226
    (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
345
226
         VGZ_NORMAL);
346
226
    if (vep->o_crc == 0) {
347
64
      vep->crc = vep->crcp;
348
64
      vep->o_crc = vep->o_pending;
349
162
    } else {
350
162
      vep->crc = crc32_combine(vep->crc,
351
162
          vep->crcp, vep->o_pending);
352
162
      vep->o_crc += vep->o_pending;
353
162
    }
354
226
    vep->crcp = crc32(0L, Z_NULL, 0);
355
226
    vep->o_wait += vep->o_pending;
356
226
    vep->o_pending = 0;
357
226
  }
358
359
  /* * Process this bit of input */
360
284k
  AN(vep->ver_p);
361
284k
  l = p - vep->ver_p;
362
284k
  assert(l >= 0);
363
284k
  vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
364
284k
  vep->o_crc += l;
365
284k
  vep->ver_p = p;
366
367
284k
  vep->o_wait += l;
368
284k
  vep->last_mark = mark;
369
284k
  (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
370
284k
}
371
372
static void
373
vep_mark_verbatim(struct vep_state *vep, const char *p)
374
128k
{
375
376
128k
  vep_mark_common(vep, p, VERBATIM);
377
128k
  vep->nm_verbatim++;
378
128k
}
379
380
static void
381
vep_mark_skip(struct vep_state *vep, const char *p)
382
154k
{
383
384
154k
  vep_mark_common(vep, p, SKIP);
385
154k
  vep->nm_skip++;
386
154k
}
387
388
static void
389
vep_mark_pending(struct vep_state *vep, const char *p)
390
226
{
391
226
  ssize_t l;
392
393
226
  AN(vep->ver_p);
394
226
  l = p - vep->ver_p;
395
226
  assert(l > 0);
396
226
  vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
397
226
  vep->ver_p = p;
398
399
226
  vep->o_pending += l;
400
226
  vep->nm_pending++;
401
226
}
402
403
/*---------------------------------------------------------------------
404
 */
405
406
static void v_matchproto_()
407
vep_do_comment(struct vep_state *vep, enum dowhat what)
408
1.79k
{
409
1.79k
  Debug("DO_COMMENT(%d)\n", what);
410
1.79k
  assert(what == DO_TAG);
411
1.79k
  if (!vep->emptytag)
412
1.49k
    vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
413
1.79k
}
414
415
/*---------------------------------------------------------------------
416
 */
417
418
static void v_matchproto_()
419
vep_do_remove(struct vep_state *vep, enum dowhat what)
420
6.54k
{
421
6.54k
  Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
422
6.54k
      what, vep->endtag, vep->emptytag, vep->remove);
423
6.54k
  assert(what == DO_TAG);
424
6.54k
  if (vep->emptytag)
425
3.20k
    vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
426
3.34k
  else if (vep->remove && !vep->endtag)
427
1.76k
    vep_error(vep, "ESI 1.0 <esi:remove> already open");
428
1.57k
  else if (!vep->remove && vep->endtag)
429
289
    vep_error(vep, "ESI 1.0 <esi:remove> not open");
430
1.28k
  else
431
1.28k
    vep->remove = !vep->endtag;
432
6.54k
}
433
434
/*---------------------------------------------------------------------
435
 */
436
437
static void
438
include_attr_src(struct vep_state *vep)
439
10.6k
{
440
10.6k
  const char *p;
441
442
10.6k
  if (vep->include_src != NULL) {
443
912
    vep_error(vep,
444
912
        "ESI 1.0 <esi:include> "
445
912
        "has multiple src= attributes");
446
912
    vep->state = VEP_TAGERROR;
447
912
    VSB_destroy(&vep->attr_vsb);
448
912
    VSB_destroy(&vep->include_src);
449
912
    return;
450
912
  }
451
12.3M
  for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
452
12.3M
    if (vct_islws(*p))
453
831
      break;
454
9.69k
  if (*p != '\0') {
455
831
    vep_error(vep,
456
831
        "ESI 1.0 <esi:include> "
457
831
        "has whitespace in src= attribute");
458
831
    vep->state = VEP_TAGERROR;
459
831
    VSB_destroy(&vep->attr_vsb);
460
831
    if (vep->include_src != NULL)
461
0
      VSB_destroy(&vep->include_src);
462
831
    return;
463
831
  }
464
8.86k
  vep->include_src = vep->attr_vsb;
465
8.86k
  vep->attr_vsb = NULL;
466
8.86k
}
467
468
static void
469
include_attr_onerror(struct vep_state *vep)
470
0
{
471
472
0
  vep->include_continue = !vstrcmp("continue", VSB_data(vep->attr_vsb));
473
0
  VSB_destroy(&vep->attr_vsb);
474
0
}
475
476
static void v_matchproto_()
477
vep_do_include(struct vep_state *vep, enum dowhat what)
478
19.0k
{
479
19.0k
  const char *p, *q, *h;
480
19.0k
  ssize_t l;
481
19.0k
  char incl;
482
483
19.0k
  Debug("DO_INCLUDE(%d)\n", what);
484
19.0k
  if (what == DO_ATTR) {
485
10.6k
    Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
486
10.6k
      VSB_data(vep->attr_vsb));
487
10.6k
    if (!vstrcmp("src=", vep->match_hit->match)) {
488
10.6k
      include_attr_src(vep);
489
10.6k
      return;
490
10.6k
    }
491
0
    if (!vstrcmp("onerror=", vep->match_hit->match)) {
492
0
      include_attr_onerror(vep);
493
0
      return;
494
0
    }
495
0
    WRONG("Unhandled <esi:include> attribute");
496
0
  }
497
8.46k
  assert(what == DO_TAG);
498
8.46k
  if (!vep->emptytag)
499
8.24k
    vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
500
8.46k
  if (vep->include_src == NULL) {
501
553
    vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
502
553
    return;
503
553
  }
504
505
  /*
506
   * Strictly speaking, we ought to spit out any piled up skip before
507
   * emitting the VEC for the include, but objectively that makes no
508
   * difference and robs us of a chance to collapse another skip into
509
   * this on so we don't do that.
510
   * However, we cannot tolerate any verbatim stuff piling up.
511
   * The mark_skip() before calling dostuff should have taken
512
   * care of that.  Make sure.
513
   */
514
7.90k
  assert(vep->o_wait == 0 || vep->last_mark == SKIP);
515
  /* XXX: what if it contains NUL bytes ?? */
516
7.90k
  p = VSB_data(vep->include_src);
517
7.90k
  l = VSB_len(vep->include_src);
518
7.90k
  h = 0;
519
520
7.90k
  incl = vep->include_continue ? VEC_IC : VEC_IA;
521
522
7.90k
  if (l > 7 && !memcmp(p, "http://", 7)) {
523
1.31k
    h = p + 7;
524
1.31k
    p = strchr(h, '/');
525
1.31k
    if (p == NULL) {
526
363
      vep_error(vep,
527
363
          "ESI 1.0 <esi:include> invalid src= URL");
528
363
      vep->state = VEP_TAGERROR;
529
363
      AZ(vep->attr_vsb);
530
363
      VSB_destroy(&vep->include_src);
531
363
      return;
532
363
    }
533
956
    Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
534
956
    VSB_printf(vep->vsb, "%c", incl);
535
956
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
536
6.59k
  } else if (l > 8 && !memcmp(p, "https://", 8)) {
537
1.50k
    if (!vep->flags.esi_ignore_https) {
538
416
      vep_warn(vep,
539
416
          "ESI 1.0 <esi:include> with https:// ignored");
540
416
      vep->state = VEP_TAGERROR;
541
416
      AZ(vep->attr_vsb);
542
416
      VSB_destroy(&vep->include_src);
543
416
      return;
544
416
    }
545
1.09k
    vep_warn(vep,
546
1.09k
        "ESI 1.0 <esi:include> https:// treated as http://");
547
1.09k
    h = p + 8;
548
1.09k
    p = strchr(h, '/');
549
1.09k
    if (p == NULL) {
550
395
      vep_error(vep,
551
395
          "ESI 1.0 <esi:include> invalid src= URL");
552
395
      vep->state = VEP_TAGERROR;
553
395
      AZ(vep->attr_vsb);
554
395
      VSB_destroy(&vep->include_src);
555
395
      return;
556
395
    }
557
696
    VSB_printf(vep->vsb, "%c", incl);
558
696
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
559
5.08k
  } else if (*p == '/') {
560
608
    VSB_printf(vep->vsb, "%c", incl);
561
608
    VSB_printf(vep->vsb, "%c", 0);
562
4.47k
  } else {
563
4.47k
    VSB_printf(vep->vsb, "%c", incl);
564
4.47k
    VSB_printf(vep->vsb, "%c", 0);
565
    /* Look for the last / before a '?' */
566
4.47k
    h = NULL;
567
8.95k
    for (q = vep->url; *q && *q != '?'; q++)
568
4.47k
      if (*q == '/')
569
4.47k
        h = q;
570
4.47k
    if (h == NULL)
571
0
      h = q + 1;
572
573
4.47k
    Debug("INCL:: [%.*s]/[%s]\n",
574
4.47k
        (int)(h - vep->url), vep->url, p);
575
4.47k
    VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
576
4.47k
  }
577
6.73k
  l -= (p - VSB_data(vep->include_src));
578
4.42M
  for (q = p; *q != '\0'; ) {
579
4.42M
    if (*q == '&') {
580
47.4k
#define R(w,f,r)              \
581
210k
      if (q + w <= p + l && !memcmp(q, f, w)) { \
582
11.0k
        VSB_printf(vep->vsb, "%c", r);  \
583
11.0k
        q += w;       \
584
11.0k
        continue;     \
585
11.0k
      }
586
47.4k
      R(6, "&apos;", '\'');
587
44.0k
      R(6, "&quot;", '"');
588
41.3k
      R(4, "&lt;", '<');
589
40.4k
      R(4, "&gt;", '>');
590
36.9k
      R(5, "&amp;", '&');
591
36.4k
    }
592
4.40M
    VSB_printf(vep->vsb, "%c", *q++);
593
4.40M
  }
594
6.73k
#undef R
595
6.73k
  VSB_printf(vep->vsb, "%c", 0);
596
6.73k
  VSB_destroy(&vep->include_src);
597
6.73k
  vep->include_continue = 0;
598
6.73k
}
599
600
/*---------------------------------------------------------------------
601
 * Lex/Parse object for ESI instructions
602
 *
603
 * This function is called with the input object piecemeal so do not
604
 * assume that we have more than one char available at at time, but
605
 * optimize for getting huge chunks.
606
 *
607
 * NB: At the bottom of this source-file, there is a dot-diagram matching
608
 * NB: the state-machine.  Please maintain it along with the code.
609
 */
610
611
void
612
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
613
1.62k
{
614
1.62k
  const char *e;
615
1.62k
  struct vep_match *vm;
616
1.62k
  int i;
617
618
1.62k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
619
1.62k
  assert(l > 0);
620
621
1.62k
  if (vep->startup) {
622
    /*
623
     * We must force the GZIP header out as a SKIP string,
624
     * otherwise an object starting with <esi:include would
625
     * have its GZIP header appear after the included object
626
     * (e000026.vtc)
627
     */
628
1.62k
    vep->ver_p = "";
629
1.62k
    vep->last_mark = SKIP;
630
1.62k
    vep_mark_common(vep, vep->ver_p, VERBATIM);
631
1.62k
    vep->startup = 0;
632
1.62k
    AZ(vep->hack_p);
633
1.62k
    vep->hack_p = p;
634
1.62k
  }
635
636
1.62k
  vep->ver_p = p;
637
638
1.62k
  e = p + l;
639
640
673k
  while (p < e) {
641
672k
    AN(vep->state);
642
672k
    Debug("EP %s %d (%.*s) [%.*s]\n",
643
672k
        vep->state,
644
672k
        vep->remove,
645
672k
        vep->tag_i, vep->tag,
646
672k
        (e - p) > 10 ? 10 : (int)(e-p), p);
647
672k
    assert(p >= vep->ver_p);
648
649
    /******************************************************
650
     * SECTION A
651
     */
652
653
672k
    if (vep->state == VEP_START) {
654
3.42k
      if (vep->flags.esi_remove_bom &&
655
3.31k
          *p == (char)0xeb) {
656
1.83k
        vep->match = vep_match_bom;
657
1.83k
        vep->state = VEP_MATCH;
658
1.83k
      } else
659
1.59k
        vep->state = VEP_BOM;
660
668k
    } else if (vep->state == VEP_BOM) {
661
1.60k
      vep_mark_skip(vep, p);
662
1.60k
      if (vep->flags.esi_disable_xml_check)
663
1.50k
        vep->state = VEP_NEXTTAG;
664
97
      else
665
97
        vep->state = VEP_TESTXML;
666
667k
    } else if (vep->state == VEP_TESTXML) {
667
      /*
668
       * If the first non-whitespace char is different
669
       * from '<' we assume this is not XML.
670
       */
671
323
      while (p < e && vct_islws(*p))
672
226
        p++;
673
97
      vep_mark_verbatim(vep, p);
674
97
      if (p < e && *p == '<') {
675
12
        p++;
676
12
        vep->state = VEP_STARTTAG;
677
85
      } else if (p < e && *p == (char)0xeb) {
678
1
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
679
1
            "WARN: No ESI processing, "
680
1
            "first char not '<' but BOM."
681
1
            " (See feature esi_remove_bom)"
682
1
        );
683
1
        vep->state = VEP_NOTXML;
684
84
      } else if (p < e) {
685
76
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
686
76
            "WARN: No ESI processing, "
687
76
            "first char not '<'."
688
76
            " (See feature esi_disable_xml_check)"
689
76
        );
690
76
        vep->state = VEP_NOTXML;
691
76
      }
692
666k
    } else if (vep->state == VEP_NOTXML) {
693
      /*
694
       * This is not recognized as XML, just skip thru
695
       * vfp_esi_end() will handle the rest
696
       */
697
77
      p = e;
698
77
      vep_mark_verbatim(vep, p);
699
700
    /******************************************************
701
     * SECTION B
702
     */
703
704
666k
    } else if (vep->state == VEP_NOTMYTAG) {
705
25.0k
      if (vep->flags.esi_ignore_other_elements) {
706
24.8k
        p++;
707
24.8k
        vep->state = VEP_NEXTTAG;
708
24.8k
      } else {
709
273
        vep->tag_i = 0;
710
36.2k
        while (p < e) {
711
36.2k
          if (*p++ == '>') {
712
259
            vep->state = VEP_NEXTTAG;
713
259
            break;
714
259
          }
715
36.2k
        }
716
273
      }
717
25.0k
      if (p == e && !vep->remove)
718
47
        vep_mark_verbatim(vep, p);
719
641k
    } else if (vep->state == VEP_NEXTTAG) {
720
      /*
721
       * Hunt for start of next tag and keep an eye
722
       * out for end of EsiCmt if armed.
723
       */
724
107k
      vep->emptytag = 0;
725
107k
      vep->attr = NULL;
726
107k
      vep->dostuff = NULL;
727
14.6M
      while (p < e && *p != '<') {
728
14.5M
        if (vep->esicmt_p == NULL) {
729
14.1M
          p++;
730
14.1M
          continue;
731
14.1M
        }
732
364k
        if (*p != *vep->esicmt_p) {
733
247k
          p++;
734
247k
          vep->esicmt_p = vep->esicmt;
735
247k
          continue;
736
247k
        }
737
116k
        if (!vep->remove && vep->esicmt_p == vep->esicmt)
738
38.4k
          vep_mark_verbatim(vep, p);
739
116k
        p++;
740
116k
        if (*++vep->esicmt_p == '\0') {
741
37.7k
          vep->esi_found = 1;
742
37.7k
          vep->esicmt = NULL;
743
37.7k
          vep->esicmt_p = NULL;
744
          /*
745
           * The end of the esicmt
746
           * should not be emitted.
747
           * But the stuff before should
748
           */
749
37.7k
          vep_mark_skip(vep, p);
750
37.7k
        }
751
116k
      }
752
107k
      if (p < e) {
753
107k
        if (!vep->remove)
754
89.7k
          vep_mark_verbatim(vep, p);
755
107k
        assert(*p == '<');
756
107k
        p++;
757
107k
        vep->state = VEP_STARTTAG;
758
107k
      } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
759
231
        vep_mark_verbatim(vep, p);
760
761
    /******************************************************
762
     * SECTION C
763
     */
764
765
533k
    } else if (vep->state == VEP_STARTTAG) {
766
      /* Start of tag, set up match table */
767
107k
      vep->endtag = 0;
768
107k
      vep->match = vep_match_starttag;
769
107k
      vep->state = VEP_MATCH;
770
426k
    } else if (vep->state == VEP_COMMENT) {
771
1.57k
      vep->esicmt_p = vep->esicmt = NULL;
772
1.57k
      vep->until_p = vep->until = "-->";
773
1.57k
      vep->until_s = VEP_NEXTTAG;
774
1.57k
      vep->state = VEP_UNTIL;
775
424k
    } else if (vep->state == VEP_COMMENTESI) {
776
38.6k
      if (vep->remove)
777
1.78k
        vep_error(vep,
778
1.78k
            "ESI 1.0 Nested <!--esi"
779
1.78k
            " element in <esi:remove>");
780
38.6k
      vep->esicmt_p = vep->esicmt = "-->";
781
38.6k
      vep->state = VEP_NEXTTAG;
782
38.6k
      vep_mark_skip(vep, p);
783
386k
    } else if (vep->state == VEP_CDATA) {
784
      /*
785
       * Easy: just look for the end of CDATA
786
       */
787
3.61k
      vep->until_p = vep->until = "]]>";
788
3.61k
      vep->until_s = VEP_NEXTTAG;
789
3.61k
      vep->state = VEP_UNTIL;
790
382k
    } else if (vep->state == VEP_ESIENDTAG) {
791
2.27k
      vep->endtag = 1;
792
2.27k
      vep->state = VEP_ESITAG;
793
380k
    } else if (vep->state == VEP_ESITAG) {
794
38.1k
      vep->in_esi_tag = 1;
795
38.1k
      vep->esi_found = 1;
796
38.1k
      vep_mark_skip(vep, p);
797
38.1k
      vep->match = vep_match_esi;
798
38.1k
      vep->state = VEP_MATCH;
799
342k
    } else if (vep->state == VEP_ESIINCLUDE) {
800
20.5k
      if (vep->remove) {
801
5.44k
        vep_error(vep,
802
5.44k
            "ESI 1.0 <esi:include> element"
803
5.44k
            " nested in <esi:remove>");
804
5.44k
        vep->state = VEP_TAGERROR;
805
15.0k
      } else if (vep->endtag) {
806
273
        vep_error(vep,
807
273
            "ESI 1.0 </esi:include> illegal end-tag");
808
273
        vep->state = VEP_TAGERROR;
809
14.7k
      } else {
810
14.7k
        vep->dostuff = vep_do_include;
811
14.7k
        vep->state = VEP_INTAG;
812
14.7k
        vep->attr = vep_match_attr_include;
813
14.7k
      }
814
321k
    } else if (vep->state == VEP_ESIREMOVE) {
815
8.22k
      vep->dostuff = vep_do_remove;
816
8.22k
      vep->state = VEP_INTAG;
817
313k
    } else if (vep->state == VEP_ESICOMMENT) {
818
3.23k
      if (vep->remove) {
819
843
        vep_error(vep,
820
843
            "ESI 1.0 <esi:comment> element"
821
843
            " nested in <esi:remove>");
822
843
        vep->state = VEP_TAGERROR;
823
2.39k
      } else if (vep->endtag) {
824
379
        vep_error(vep,
825
379
            "ESI 1.0 </esi:comment> illegal end-tag");
826
379
        vep->state = VEP_TAGERROR;
827
2.01k
      } else {
828
2.01k
        vep->dostuff = vep_do_comment;
829
2.01k
        vep->state = VEP_INTAG;
830
2.01k
      }
831
310k
    } else if (vep->state == VEP_ESIBOGON) {
832
6.13k
      vep_error(vep,
833
6.13k
          "ESI 1.0 <esi:bogus> element");
834
6.13k
      vep->state = VEP_TAGERROR;
835
836
    /******************************************************
837
     * SECTION D
838
     */
839
840
304k
    } else if (vep->state == VEP_INTAG) {
841
40.6k
      vep->tag_i = 0;
842
48.8k
      while (p < e && vct_islws(*p) && !vep->emptytag) {
843
8.20k
        p++;
844
8.20k
        vep->canattr = 1;
845
8.20k
      }
846
40.6k
      if (p < e && *p == '/' && !vep->emptytag) {
847
4.24k
        p++;
848
4.24k
        vep->emptytag = 1;
849
4.24k
        vep->canattr = 0;
850
4.24k
      }
851
40.6k
      if (p < e && *p == '>') {
852
16.7k
        p++;
853
16.7k
        AN(vep->dostuff);
854
16.7k
        vep_mark_skip(vep, p);
855
16.7k
        vep->dostuff(vep, DO_TAG);
856
16.7k
        vep->in_esi_tag = 0;
857
16.7k
        vep->state = VEP_NEXTTAG;
858
23.8k
      } else if (p < e && vep->emptytag) {
859
516
        vep_error(vep,
860
516
            "XML 1.0 '>' does not follow '/' in tag");
861
516
        vep->state = VEP_TAGERROR;
862
23.3k
      } else if (p < e && vep->canattr &&
863
21.8k
          vct_isxmlnamestart(*p)) {
864
21.0k
        vep->state = VEP_ATTR;
865
21.0k
      } else if (p < e) {
866
2.27k
        vep_error(vep,
867
2.27k
            "XML 1.0 Illegal attribute start char");
868
2.27k
        vep->state = VEP_TAGERROR;
869
2.27k
      }
870
263k
    } else if (vep->state == VEP_TAGERROR) {
871
3.92M
      while (p < e && *p != '>')
872
3.90M
        p++;
873
21.1k
      if (p < e) {
874
20.9k
        p++;
875
20.9k
        vep_mark_skip(vep, p);
876
20.9k
        vep->in_esi_tag = 0;
877
20.9k
        vep->state = VEP_NEXTTAG;
878
20.9k
        if (vep->attr_vsb)
879
1.22k
          VSB_destroy(&vep->attr_vsb);
880
20.9k
      }
881
882
    /******************************************************
883
     * SECTION E
884
     */
885
886
242k
    } else if (vep->state == VEP_ATTR) {
887
21.0k
      AZ(vep->attr_delim);
888
21.0k
      if (vep->attr == NULL) {
889
4.97k
        p++;
890
4.97k
        AZ(vep->attr_vsb);
891
4.97k
        vep->state = VEP_SKIPATTR;
892
16.0k
      } else {
893
16.0k
        vep->match = vep->attr;
894
16.0k
        vep->state = VEP_MATCH;
895
16.0k
      }
896
221k
    } else if (vep->state == VEP_SKIPATTR) {
897
29.9k
      while (p < e && vct_isxmlname(*p))
898
21.9k
        p++;
899
7.94k
      if (p < e && *p == '=') {
900
3.51k
        p++;
901
3.51k
        vep->state = VEP_ATTRDELIM;
902
4.43k
      } else if (p < e && *p == '>') {
903
2.15k
        vep->state = VEP_INTAG;
904
2.28k
      } else if (p < e && *p == '/') {
905
403
        vep->state = VEP_INTAG;
906
1.88k
      } else if (p < e && vct_issp(*p)) {
907
1.27k
        vep->state = VEP_INTAG;
908
1.27k
      } else if (p < e) {
909
591
        vep_error(vep,
910
591
            "XML 1.0 Illegal attr char");
911
591
        vep->state = VEP_TAGERROR;
912
591
      }
913
213k
    } else if (vep->state == VEP_ATTRGETVAL) {
914
13.0k
      AZ(vep->attr_vsb);
915
13.0k
      vep->attr_vsb = VSB_new_auto();
916
13.0k
      vep->state = VEP_ATTRDELIM;
917
200k
    } else if (vep->state == VEP_ATTRDELIM) {
918
16.5k
      AZ(vep->attr_delim);
919
16.5k
      if (*p == '"' || *p == '\'') {
920
754
        vep->attr_delim = *p++;
921
754
        vep->state = VEP_ATTRVAL;
922
15.8k
      } else if (!vct_issp(*p)) {
923
14.4k
        vep->attr_delim = ' ';
924
14.4k
        vep->state = VEP_ATTRVAL;
925
14.4k
      } else {
926
1.41k
        vep_error(vep,
927
1.41k
            "XML 1.0 Illegal attribute delimiter");
928
1.41k
        vep->state = VEP_TAGERROR;
929
1.41k
      }
930
931
183k
    } else if (vep->state == VEP_ATTRVAL) {
932
17.4M
      while (p < e && *p != '>' && *p != vep->attr_delim &&
933
17.4M
         (vep->attr_delim != ' ' || !vct_issp(*p))) {
934
17.4M
        if (vep->attr_vsb != NULL)
935
17.2M
          VSB_putc(vep->attr_vsb, *p);
936
17.4M
        p++;
937
17.4M
      }
938
15.1k
      if (p < e && *p == '>') {
939
1.51k
        vep_error(vep,
940
1.51k
            "XML 1.0 Missing end attribute delimiter");
941
1.51k
        vep->state = VEP_TAGERROR;
942
1.51k
        vep->attr_delim = 0;
943
1.51k
        if (vep->attr_vsb != NULL) {
944
1.19k
          AZ(VSB_finish(vep->attr_vsb));
945
1.19k
          VSB_destroy(&vep->attr_vsb);
946
1.19k
        }
947
13.6k
      } else if (p < e) {
948
13.5k
        vep->attr_delim = 0;
949
13.5k
        p++;
950
13.5k
        vep->state = VEP_INTAG;
951
13.5k
        if (vep->attr_vsb != NULL) {
952
10.6k
          AZ(VSB_finish(vep->attr_vsb));
953
10.6k
          AN(vep->dostuff);
954
10.6k
          vep->dostuff(vep, DO_ATTR);
955
10.6k
          vep->attr_vsb = NULL;
956
10.6k
        }
957
13.5k
      }
958
959
    /******************************************************
960
     * Utility Section
961
     */
962
963
168k
    } else if (vep->state == VEP_MATCH) {
964
      /*
965
       * Match against a table
966
       */
967
163k
      vm = vep_match(vep, p, e);
968
163k
      vep->match_hit = vm;
969
163k
      if (vm != NULL) {
970
163k
        if (vm->match != NULL)
971
129k
          p += vstrlen(vm->match);
972
163k
        vep->state = *vm->state;
973
163k
        vep->match = NULL;
974
163k
        vep->tag_i = 0;
975
163k
      } else {
976
40
        assert(p + sizeof(vep->tag) >= e);
977
40
        memcpy(vep->tag, p, e - p);
978
40
        vep->tag_i = e - p;
979
40
        vep->state = VEP_MATCHBUF;
980
40
        p = e;
981
40
      }
982
163k
    } else if (vep->state == VEP_MATCHBUF) {
983
      /*
984
       * Match against a table while split over input
985
       * sections.
986
       */
987
0
      AN(vep->match);
988
0
      i = sizeof(vep->tag) - vep->tag_i;
989
0
      if (i > e - p)
990
0
        i = e - p;
991
0
      memcpy(vep->tag + vep->tag_i, p, i);
992
0
      vm = vep_match(vep, vep->tag,
993
0
          vep->tag + vep->tag_i + i);
994
0
      Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
995
0
          vep->tag_i + i, vep->tag,
996
0
          vep->tag_i,
997
0
          i,
998
0
          vm,
999
0
          vm ? vm->match : "(nil)");
1000
1001
0
      if (vm == NULL) {
1002
0
        vep->tag_i += i;
1003
0
        p += i;
1004
0
        assert(p == e);
1005
0
      } else {
1006
0
        vep->match_hit = vm;
1007
0
        vep->state = *vm->state;
1008
0
        if (vm->match != NULL) {
1009
0
          i = vstrlen(vm->match);
1010
0
          if (i > vep->tag_i)
1011
0
            p += i - vep->tag_i;
1012
0
        }
1013
0
        vep->match = NULL;
1014
0
        vep->tag_i = 0;
1015
0
      }
1016
5.19k
    } else if (vep->state == VEP_UNTIL) {
1017
      /*
1018
       * Skip until we see magic string
1019
       */
1020
12.9M
      while (p < e) {
1021
12.9M
        if (*p++ != *vep->until_p++) {
1022
12.9M
          vep->until_p = vep->until;
1023
12.9M
        } else if (*vep->until_p == '\0') {
1024
5.12k
          vep->state = vep->until_s;
1025
5.12k
          break;
1026
5.12k
        }
1027
12.9M
      }
1028
5.19k
      if (p == e && !vep->remove)
1029
69
        vep_mark_verbatim(vep, p);
1030
5.19k
    } else {
1031
0
      Debug("*** Unknown state %s\n", vep->state);
1032
0
      WRONG("WRONG ESI PARSER STATE");
1033
0
    }
1034
672k
  }
1035
  /*
1036
   * We must always mark up the storage we got, try to do so
1037
   * in the most efficient way, in particular with respect to
1038
   * minimizing and limiting use of pending.
1039
   */
1040
1.62k
  if (p == vep->ver_p)
1041
1.01k
    ;
1042
611
  else if (vep->in_esi_tag)
1043
354
    vep_mark_skip(vep, p);
1044
257
  else if (vep->remove)
1045
31
    vep_mark_skip(vep, p);
1046
226
  else
1047
226
    vep_mark_pending(vep, p);
1048
1.62k
}
1049
1050
/*---------------------------------------------------------------------
1051
 */
1052
1053
static ssize_t v_matchproto_(vep_callback_t)
1054
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1055
466k
{
1056
466k
  ssize_t *s;
1057
1058
466k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1059
466k
  AN(priv);
1060
466k
  s = priv;
1061
466k
  *s += l;
1062
466k
  (void)flg;
1063
466k
  return (*s);
1064
466k
}
1065
1066
/*---------------------------------------------------------------------
1067
 */
1068
1069
struct vep_state *
1070
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1071
    void *cb_priv, struct vep_flags flags)
1072
1.62k
{
1073
1.62k
  struct vep_state *vep;
1074
1075
1.62k
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1076
1.62k
  CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1077
1.62k
  vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1078
1.62k
  if (vep == NULL) {
1079
0
    VSLb(vc->wrk->vsl, SLT_VCL_Error,
1080
0
         "VEP_Init() workspace overflow");
1081
0
    return (NULL);
1082
0
  }
1083
1084
1.62k
  INIT_OBJ(vep, VEP_MAGIC);
1085
1.62k
  vep->flags = flags;
1086
1.62k
  vep->url = req->hd[HTTP_HDR_URL].b;
1087
1.62k
  vep->vc = vc;
1088
1.62k
  vep->vsb = VSB_new_auto();
1089
1.62k
  AN(vep->vsb);
1090
1091
1.62k
  if (cb != NULL) {
1092
0
    vep->dogzip = 1;
1093
    /* XXX */
1094
0
    VSB_printf(vep->vsb, "%c", VEC_GZ);
1095
0
    vep->cb = cb;
1096
0
    vep->cb_priv = cb_priv;
1097
1.62k
  } else {
1098
1.62k
    vep->cb = vep_default_cb;
1099
1.62k
    vep->cb_priv = &vep->cb_x;
1100
1.62k
  }
1101
1102
1.62k
  vep->state = VEP_START;
1103
1.62k
  vep->crc = crc32(0L, Z_NULL, 0);
1104
1.62k
  vep->crcp = crc32(0L, Z_NULL, 0);
1105
1106
1.62k
  vep->startup = 1;
1107
1.62k
  return (vep);
1108
1.62k
}
1109
1110
/*---------------------------------------------------------------------
1111
 */
1112
1113
struct vsb *
1114
VEP_Finish(struct vep_state *vep)
1115
1.62k
{
1116
1.62k
  ssize_t l, lcb;
1117
1118
1.62k
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1119
1120
1.62k
  if (vep->include_src)
1121
43
    VSB_destroy(&vep->include_src);
1122
1.62k
  if (vep->attr_vsb)
1123
42
    VSB_destroy(&vep->attr_vsb);
1124
1125
1.62k
  if (vep->state != VEP_START &&
1126
1.61k
      vep->state != VEP_BOM &&
1127
1.61k
      vep->state != VEP_TESTXML &&
1128
1.60k
      vep->state != VEP_NOTXML &&
1129
1.52k
      vep->state != VEP_NEXTTAG) {
1130
583
    vep_error(vep, "VEP ended inside a tag");
1131
583
  }
1132
1133
1.62k
  if (vep->o_pending)
1134
226
    vep_mark_common(vep, vep->ver_p, vep->last_mark);
1135
1.62k
  if (vep->o_wait > 0) {
1136
1.59k
    lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1137
1.59k
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1138
1.59k
  }
1139
  // NB: We don't account for PAD+SUM+LEN in gzipped objects
1140
1.62k
  (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1141
1142
1.62k
  AZ(VSB_finish(vep->vsb));
1143
1.62k
  l = VSB_len(vep->vsb);
1144
1.62k
  if (vep->esi_found && l > 0)
1145
1.11k
    return (vep->vsb);
1146
511
  VSB_destroy(&vep->vsb);
1147
  return (NULL);
1148
1.62k
}
1149
1150
#if 0
1151
1152
digraph xml {
1153
  rankdir="LR"
1154
  size="7,10"
1155
#################################################################
1156
# SECTION A
1157
#
1158
1159
START   [shape=ellipse]
1160
TESTXML   [shape=ellipse]
1161
NOTXML    [shape=ellipse]
1162
NEXTTAGa  [shape=hexagon, label="NEXTTAG"]
1163
STARTTAGa [shape=hexagon, label="STARTTAG"]
1164
START   -> TESTXML
1165
START   -> NEXTTAGa [style=dotted, label="syntax:1"]
1166
TESTXML   -> TESTXML  [label="lws"]
1167
TESTXML   -> NOTXML
1168
TESTXML   -> STARTTAGa  [label="'<'"]
1169
1170
#################################################################
1171
# SECTION B
1172
1173
NOTMYTAG  [shape=ellipse]
1174
NEXTTAG   [shape=ellipse]
1175
NOTMYTAG  -> NEXTTAG  [style=dotted, label="syntax:2"]
1176
STARTTAGb [shape=hexagon, label="STARTTAG"]
1177
NOTMYTAG  -> NEXTTAG  [label="'>'"]
1178
NOTMYTAG  -> NOTMYTAG [label="*"]
1179
NEXTTAG   -> NEXTTAG  [label="'-->'"]
1180
NEXTTAG   -> NEXTTAG  [label="*"]
1181
NEXTTAG   -> STARTTAGb  [label="'<'"]
1182
1183
#################################################################
1184
# SECTION C
1185
1186
STARTTAG  [shape=ellipse]
1187
COMMENT   [shape=ellipse]
1188
CDATA   [shape=ellipse]
1189
ESITAG    [shape=ellipse]
1190
ESIETAG   [shape=ellipse]
1191
ESIINCLUDE  [shape=ellipse]
1192
ESIREMOVE [shape=ellipse]
1193
ESICOMMENT  [shape=ellipse]
1194
ESIBOGON  [shape=ellipse]
1195
INTAGc    [shape=hexagon, label="INTAG"]
1196
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"]
1197
NEXTTAGc  [shape=hexagon, label="NEXTTAG"]
1198
TAGERRORc [shape=hexagon, label="TAGERROR"]
1199
C1    [shape=circle,label=""]
1200
STARTTAG  -> COMMENT  [label="'<!--'"]
1201
STARTTAG  -> ESITAG [label="'<esi'"]
1202
STARTTAG  -> CDATA  [label="'<![CDATA['"]
1203
STARTTAG  -> NOTMYTAGc  [label="'*'"]
1204
COMMENT   -> NEXTTAGc [label="'esi'"]
1205
COMMENT   -> C1   [label="*"]
1206
C1    -> C1   [label="*"]
1207
C1    -> NEXTTAGc [label="-->"]
1208
CDATA   -> CDATA  [label="*"]
1209
CDATA   -> NEXTTAGc [label="]]>"]
1210
ESITAG    -> ESIINCLUDE [label="'include'"]
1211
ESITAG    -> ESIREMOVE  [label="'remove'"]
1212
ESITAG    -> ESICOMMENT [label="'comment'"]
1213
ESITAG    -> ESIBOGON [label="*"]
1214
ESICOMMENT  -> INTAGc
1215
ESICOMMENT  -> TAGERRORc
1216
ESICOMMENT  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1217
ESIREMOVE -> INTAGc
1218
ESIREMOVE -> TAGERRORc
1219
ESIINCLUDE  -> INTAGc
1220
ESIINCLUDE  -> TAGERRORc
1221
ESIINCLUDE  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1222
ESIBOGON  -> TAGERRORc
1223
1224
#################################################################
1225
# SECTION D
1226
1227
INTAG   [shape=ellipse]
1228
TAGERROR  [shape=ellipse]
1229
NEXTTAGd  [shape=hexagon, label="NEXTTAG"]
1230
ATTRd   [shape=hexagon, label="ATTR"]
1231
D1    [shape=circle, label=""]
1232
D2    [shape=circle, label=""]
1233
INTAG   -> D1   [label="lws"]
1234
D1    -> D2   [label="/"]
1235
INTAG   -> D2   [label="/"]
1236
INTAG   -> NEXTTAGd [label=">"]
1237
D1    -> NEXTTAGd [label=">"]
1238
D2    -> NEXTTAGd [label=">"]
1239
D1    -> ATTRd  [label="XMLstartchar"]
1240
D1    -> TAGERROR [label="*"]
1241
D2    -> TAGERROR [label="*"]
1242
TAGERROR  -> TAGERROR [label="*"]
1243
TAGERROR  -> NEXTTAGd [label="'>'"]
1244
1245
#################################################################
1246
# SECTION E
1247
1248
ATTR    [shape=ellipse]
1249
SKIPATTR  [shape=ellipse]
1250
ATTRGETVAL  [shape=ellipse]
1251
ATTRDELIM [shape=ellipse]
1252
ATTRVAL   [shape=ellipse]
1253
TAGERRORe [shape=hexagon, label="TAGERROR"]
1254
INTAGe    [shape=hexagon, label="INTAG"]
1255
ATTR    -> SKIPATTR [label="*"]
1256
ATTR    -> ATTRGETVAL [label="wanted attr"]
1257
SKIPATTR  -> SKIPATTR [label="XMLname"]
1258
SKIPATTR  -> ATTRDELIM  [label="'='"]
1259
SKIPATTR  -> TAGERRORe  [label="*"]
1260
ATTRGETVAL  -> ATTRDELIM
1261
ATTRDELIM -> ATTRVAL  [label="\""]
1262
ATTRDELIM -> ATTRVAL  [label="\'"]
1263
ATTRDELIM -> ATTRVAL  [label="*"]
1264
ATTRDELIM -> TAGERRORe  [label="lws"]
1265
ATTRVAL   -> TAGERRORe  [label="'>'"]
1266
ATTRVAL   -> INTAGe [label="delim"]
1267
ATTRVAL   -> ATTRVAL  [label="*"]
1268
1269
}
1270
1271
#endif