/src/vinyl-cache/bin/vinyld/cache/cache_esi_parse.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c) 2011 Varnish Software AS |
3 | | * All rights reserved. |
4 | | * |
5 | | * Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 | | * |
7 | | * SPDX-License-Identifier: BSD-2-Clause |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | * |
30 | | * VEP Vinyl Esi Parsing |
31 | | */ |
32 | | |
33 | | #include "config.h" |
34 | | |
35 | | #include "cache_vinyld.h" |
36 | | #include "cache_filter.h" |
37 | | |
38 | | #include "cache_vgz.h" |
39 | | #include "cache_esi.h" |
40 | | #include "vct.h" |
41 | | #include "vend.h" |
42 | | #include "vgz.h" |
43 | | |
44 | | //#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) |
45 | | #define Debug(fmt, ...) /**/ |
46 | | |
47 | | struct vep_state; |
48 | | |
49 | | enum dowhat {DO_ATTR, DO_TAG}; |
50 | | typedef void dostuff_f(struct vep_state *, enum dowhat); |
51 | | |
52 | | struct vep_match { |
53 | | const char *match; |
54 | | const char * const *state; |
55 | | }; |
56 | | |
57 | | enum vep_mark { VERBATIM = 0, SKIP }; |
58 | | |
59 | | struct vep_state { |
60 | | unsigned magic; |
61 | | #define VEP_MAGIC 0x55cb9b82 |
62 | | // flags from bereq |
63 | | struct vep_flags flags; |
64 | | |
65 | | struct vsb *vsb; |
66 | | |
67 | | const char *url; |
68 | | struct vfp_ctx *vc; |
69 | | int dogzip; |
70 | | vep_callback_t *cb; |
71 | | void *cb_priv; |
72 | | |
73 | | /* Internal Counter for default call-back function */ |
74 | | ssize_t cb_x; |
75 | | |
76 | | /* parser state */ |
77 | | const char *state; |
78 | | unsigned startup; |
79 | | unsigned esi_found; |
80 | | |
81 | | unsigned endtag; |
82 | | unsigned emptytag; |
83 | | unsigned canattr; |
84 | | |
85 | | unsigned remove; |
86 | | |
87 | | ssize_t o_wait; |
88 | | ssize_t o_pending; |
89 | | ssize_t o_total; |
90 | | uint32_t crc; |
91 | | ssize_t o_crc; |
92 | | uint32_t crcp; |
93 | | ssize_t o_last; |
94 | | |
95 | | const char *hack_p; |
96 | | const char *ver_p; |
97 | | |
98 | | const char *until; |
99 | | const char *until_p; |
100 | | const char *until_s; |
101 | | |
102 | | int in_esi_tag; |
103 | | |
104 | | const char *esicmt; |
105 | | const char *esicmt_p; |
106 | | |
107 | | struct vep_match *attr; |
108 | | struct vsb *attr_vsb; |
109 | | int attr_delim; |
110 | | |
111 | | struct vep_match *match; |
112 | | struct vep_match *match_hit; |
113 | | |
114 | | char tag[8]; |
115 | | int tag_i; |
116 | | |
117 | | dostuff_f *dostuff; |
118 | | |
119 | | struct vsb *include_src; |
120 | | unsigned include_continue; |
121 | | |
122 | | unsigned nm_skip; |
123 | | unsigned nm_verbatim; |
124 | | unsigned nm_pending; |
125 | | enum vep_mark last_mark; |
126 | | }; |
127 | | |
128 | | /*---------------------------------------------------------------------*/ |
129 | | |
130 | | static const char * const VEP_START = "[Start]"; |
131 | | static const char * const VEP_BOM = "[BOM]"; |
132 | | static const char * const VEP_TESTXML = "[TestXml]"; |
133 | | static const char * const VEP_NOTXML = "[NotXml]"; |
134 | | |
135 | | static const char * const VEP_NEXTTAG = "[NxtTag]"; |
136 | | static const char * const VEP_NOTMYTAG = "[NotMyTag]"; |
137 | | |
138 | | static const char * const VEP_STARTTAG = "[StartTag]"; |
139 | | static const char * const VEP_COMMENTESI = "[CommentESI]"; |
140 | | static const char * const VEP_COMMENT = "[Comment]"; |
141 | | static const char * const VEP_CDATA = "[CDATA]"; |
142 | | static const char * const VEP_ESITAG = "[ESITag]"; |
143 | | static const char * const VEP_ESIENDTAG = "[/ESITag]"; |
144 | | |
145 | | static const char * const VEP_ESIREMOVE = "[ESI:Remove]"; |
146 | | static const char * const VEP_ESIINCLUDE = "[ESI:Include]"; |
147 | | static const char * const VEP_ESICOMMENT = "[ESI:Comment]"; |
148 | | static const char * const VEP_ESIBOGON = "[ESI:Bogon]"; |
149 | | |
150 | | static const char * const VEP_INTAG = "[InTag]"; |
151 | | static const char * const VEP_TAGERROR = "[TagError]"; |
152 | | |
153 | | static const char * const VEP_ATTR = "[Attribute]"; |
154 | | static const char * const VEP_SKIPATTR = "[SkipAttribute]"; |
155 | | static const char * const VEP_ATTRDELIM = "[AttrDelim]"; |
156 | | static const char * const VEP_ATTRGETVAL = "[AttrGetValue]"; |
157 | | static const char * const VEP_ATTRVAL = "[AttrValue]"; |
158 | | |
159 | | static const char * const VEP_UNTIL = "[Until]"; |
160 | | static const char * const VEP_MATCHBUF = "[MatchBuf]"; |
161 | | static const char * const VEP_MATCH = "[Match]"; |
162 | | |
163 | | /*---------------------------------------------------------------------*/ |
164 | | |
165 | | static struct vep_match vep_match_starttag[] = { |
166 | | { "!--esi", &VEP_COMMENTESI }, |
167 | | { "!---->", &VEP_NEXTTAG }, |
168 | | { "!--", &VEP_COMMENT }, |
169 | | { "/esi:", &VEP_ESIENDTAG }, |
170 | | { "esi:", &VEP_ESITAG }, |
171 | | { "![CDATA[", &VEP_CDATA }, |
172 | | { NULL, &VEP_NOTMYTAG } |
173 | | }; |
174 | | |
175 | | /*---------------------------------------------------------------------*/ |
176 | | |
177 | | static struct vep_match vep_match_esi[] = { |
178 | | { "include", &VEP_ESIINCLUDE }, |
179 | | { "remove", &VEP_ESIREMOVE }, |
180 | | { "comment", &VEP_ESICOMMENT }, |
181 | | { NULL, &VEP_ESIBOGON } |
182 | | }; |
183 | | |
184 | | /*---------------------------------------------------------------------*/ |
185 | | |
186 | | static struct vep_match vep_match_attr_include[] = { |
187 | | { "src=", &VEP_ATTRGETVAL }, |
188 | | { "onerror=", &VEP_ATTRGETVAL }, |
189 | | { NULL, &VEP_SKIPATTR } |
190 | | }; |
191 | | |
192 | | /*---------------------------------------------------------------------*/ |
193 | | |
194 | | static struct vep_match vep_match_bom[] = { |
195 | | { "\xeb\xbb\xbf", &VEP_START }, |
196 | | { NULL, &VEP_BOM } |
197 | | }; |
198 | | |
199 | | /*-------------------------------------------------------------------- |
200 | | * Report a parsing error |
201 | | */ |
202 | | |
203 | | static void |
204 | | vep_error(const struct vep_state *vep, const char *p) |
205 | 31.5k | { |
206 | 31.5k | VSC_C_main->esi_errors++; |
207 | 31.5k | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s", |
208 | 31.5k | vep->o_last, p); |
209 | 31.5k | } |
210 | | |
211 | | /*-------------------------------------------------------------------- |
212 | | * Report a parsing warning |
213 | | */ |
214 | | |
215 | | static void |
216 | | vep_warn(const struct vep_state *vep, const char *p) |
217 | 9.75k | { |
218 | 9.75k | VSC_C_main->esi_warnings++; |
219 | 9.75k | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s", |
220 | 9.75k | vep->o_last, p); |
221 | 9.75k | } |
222 | | |
223 | | /*--------------------------------------------------------------------- |
224 | | * return match or NULL if more input needed. |
225 | | */ |
226 | | |
227 | | static struct vep_match * |
228 | | vep_match(const struct vep_state *vep, const char *b, const char *e) |
229 | 163k | { |
230 | 163k | struct vep_match *vm; |
231 | 163k | const char *q, *r; |
232 | | |
233 | 163k | AN(vep->match); |
234 | 524k | for (vm = vep->match; vm->match != NULL; vm++) { |
235 | 490k | assert(vstrlen(vm->match) <= sizeof (vep->tag)); |
236 | 490k | r = b; |
237 | 1.24M | for (q = vm->match; *q != '\0' && r < e; q++, r++) |
238 | 1.11M | if (*q != *r) |
239 | 361k | break; |
240 | 490k | if (*q == '\0') |
241 | 129k | break; |
242 | 361k | if (r == e) |
243 | 40 | return (NULL); |
244 | 361k | } |
245 | 163k | return (vm); |
246 | 163k | } |
247 | | |
248 | | /*--------------------------------------------------------------------- |
249 | | * |
250 | | */ |
251 | | |
252 | | static void |
253 | | vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) |
254 | 177k | { |
255 | 177k | uint8_t buf[9]; |
256 | | |
257 | 177k | assert(l > 0); |
258 | 177k | if (l < 256) { |
259 | 176k | buf[0] = (uint8_t)m8; |
260 | 176k | buf[1] = (uint8_t)l; |
261 | 176k | assert((ssize_t)buf[1] == l); |
262 | 176k | VSB_bcat(vep->vsb, buf, 2); |
263 | 176k | } else if (l < 65536) { |
264 | 1.34k | buf[0] = (uint8_t)m16; |
265 | 1.34k | vbe16enc(buf + 1, (uint16_t)l); |
266 | 1.34k | assert((ssize_t)vbe16dec(buf + 1) == l); |
267 | 1.34k | VSB_bcat(vep->vsb, buf, 3); |
268 | 1.34k | } else { |
269 | 158 | buf[0] = (uint8_t)m64; |
270 | 158 | vbe64enc(buf + 1, l); |
271 | 158 | assert((ssize_t)vbe64dec(buf + 1) == l); |
272 | 158 | VSB_bcat(vep->vsb, buf, 9); |
273 | 158 | } |
274 | 177k | } |
275 | | |
276 | | static void |
277 | | vep_emit_skip(const struct vep_state *vep, ssize_t l) |
278 | 99.8k | { |
279 | | |
280 | 99.8k | vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); |
281 | 99.8k | } |
282 | | |
283 | | static void |
284 | | vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) |
285 | 77.9k | { |
286 | 77.9k | uint8_t buf[4]; |
287 | | |
288 | 77.9k | vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); |
289 | 77.9k | if (vep->dogzip) { |
290 | 0 | vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); |
291 | 0 | vbe32enc(buf, vep->crc); |
292 | 0 | VSB_bcat(vep->vsb, buf, sizeof buf); |
293 | 0 | } |
294 | 77.9k | } |
295 | | |
296 | | static void |
297 | | vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) |
298 | 179k | { |
299 | | |
300 | 179k | assert(l >= 0); |
301 | 179k | if (l == 0) |
302 | 1.62k | return; |
303 | 177k | assert(mark == SKIP || mark == VERBATIM); |
304 | 177k | if (mark == SKIP) |
305 | 99.8k | vep_emit_skip(vep, l); |
306 | 77.9k | else |
307 | 77.9k | vep_emit_verbatim(vep, l, vep->o_crc); |
308 | | |
309 | 177k | vep->crc = crc32(0L, Z_NULL, 0); |
310 | 177k | vep->o_crc = 0; |
311 | 177k | vep->o_total += l; |
312 | 177k | } |
313 | | |
314 | | /*--------------------------------------------------------------------- |
315 | | * |
316 | | */ |
317 | | |
318 | | static void |
319 | | vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) |
320 | 284k | { |
321 | 284k | ssize_t l, lcb; |
322 | | |
323 | 284k | assert(mark == SKIP || mark == VERBATIM); |
324 | | |
325 | | /* The NO-OP case, no data, no pending data & no change of mode */ |
326 | 284k | if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) |
327 | 0 | return; |
328 | | |
329 | | /* |
330 | | * If we changed mode, emit whatever the opposite mode |
331 | | * assembled before the pending bytes. |
332 | | */ |
333 | | |
334 | 284k | if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { |
335 | 177k | lcb = vep->cb(vep->vc, vep->cb_priv, 0, |
336 | 177k | mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); |
337 | 177k | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
338 | 177k | vep->o_last = lcb; |
339 | 177k | vep->o_wait = 0; |
340 | 177k | } |
341 | | |
342 | | /* Transfer pending bytes CRC into active mode CRC */ |
343 | 284k | if (vep->o_pending) { |
344 | 226 | (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, |
345 | 226 | VGZ_NORMAL); |
346 | 226 | if (vep->o_crc == 0) { |
347 | 64 | vep->crc = vep->crcp; |
348 | 64 | vep->o_crc = vep->o_pending; |
349 | 162 | } else { |
350 | 162 | vep->crc = crc32_combine(vep->crc, |
351 | 162 | vep->crcp, vep->o_pending); |
352 | 162 | vep->o_crc += vep->o_pending; |
353 | 162 | } |
354 | 226 | vep->crcp = crc32(0L, Z_NULL, 0); |
355 | 226 | vep->o_wait += vep->o_pending; |
356 | 226 | vep->o_pending = 0; |
357 | 226 | } |
358 | | |
359 | | /* * Process this bit of input */ |
360 | 284k | AN(vep->ver_p); |
361 | 284k | l = p - vep->ver_p; |
362 | 284k | assert(l >= 0); |
363 | 284k | vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); |
364 | 284k | vep->o_crc += l; |
365 | 284k | vep->ver_p = p; |
366 | | |
367 | 284k | vep->o_wait += l; |
368 | 284k | vep->last_mark = mark; |
369 | 284k | (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); |
370 | 284k | } |
371 | | |
372 | | static void |
373 | | vep_mark_verbatim(struct vep_state *vep, const char *p) |
374 | 128k | { |
375 | | |
376 | 128k | vep_mark_common(vep, p, VERBATIM); |
377 | 128k | vep->nm_verbatim++; |
378 | 128k | } |
379 | | |
380 | | static void |
381 | | vep_mark_skip(struct vep_state *vep, const char *p) |
382 | 154k | { |
383 | | |
384 | 154k | vep_mark_common(vep, p, SKIP); |
385 | 154k | vep->nm_skip++; |
386 | 154k | } |
387 | | |
388 | | static void |
389 | | vep_mark_pending(struct vep_state *vep, const char *p) |
390 | 226 | { |
391 | 226 | ssize_t l; |
392 | | |
393 | 226 | AN(vep->ver_p); |
394 | 226 | l = p - vep->ver_p; |
395 | 226 | assert(l > 0); |
396 | 226 | vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); |
397 | 226 | vep->ver_p = p; |
398 | | |
399 | 226 | vep->o_pending += l; |
400 | 226 | vep->nm_pending++; |
401 | 226 | } |
402 | | |
403 | | /*--------------------------------------------------------------------- |
404 | | */ |
405 | | |
406 | | static void v_matchproto_() |
407 | | vep_do_comment(struct vep_state *vep, enum dowhat what) |
408 | 1.79k | { |
409 | 1.79k | Debug("DO_COMMENT(%d)\n", what); |
410 | 1.79k | assert(what == DO_TAG); |
411 | 1.79k | if (!vep->emptytag) |
412 | 1.49k | vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); |
413 | 1.79k | } |
414 | | |
415 | | /*--------------------------------------------------------------------- |
416 | | */ |
417 | | |
418 | | static void v_matchproto_() |
419 | | vep_do_remove(struct vep_state *vep, enum dowhat what) |
420 | 6.54k | { |
421 | 6.54k | Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", |
422 | 6.54k | what, vep->endtag, vep->emptytag, vep->remove); |
423 | 6.54k | assert(what == DO_TAG); |
424 | 6.54k | if (vep->emptytag) |
425 | 3.20k | vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); |
426 | 3.34k | else if (vep->remove && !vep->endtag) |
427 | 1.76k | vep_error(vep, "ESI 1.0 <esi:remove> already open"); |
428 | 1.57k | else if (!vep->remove && vep->endtag) |
429 | 289 | vep_error(vep, "ESI 1.0 <esi:remove> not open"); |
430 | 1.28k | else |
431 | 1.28k | vep->remove = !vep->endtag; |
432 | 6.54k | } |
433 | | |
434 | | /*--------------------------------------------------------------------- |
435 | | */ |
436 | | |
437 | | static void |
438 | | include_attr_src(struct vep_state *vep) |
439 | 10.6k | { |
440 | 10.6k | const char *p; |
441 | | |
442 | 10.6k | if (vep->include_src != NULL) { |
443 | 912 | vep_error(vep, |
444 | 912 | "ESI 1.0 <esi:include> " |
445 | 912 | "has multiple src= attributes"); |
446 | 912 | vep->state = VEP_TAGERROR; |
447 | 912 | VSB_destroy(&vep->attr_vsb); |
448 | 912 | VSB_destroy(&vep->include_src); |
449 | 912 | return; |
450 | 912 | } |
451 | 12.3M | for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) |
452 | 12.3M | if (vct_islws(*p)) |
453 | 831 | break; |
454 | 9.69k | if (*p != '\0') { |
455 | 831 | vep_error(vep, |
456 | 831 | "ESI 1.0 <esi:include> " |
457 | 831 | "has whitespace in src= attribute"); |
458 | 831 | vep->state = VEP_TAGERROR; |
459 | 831 | VSB_destroy(&vep->attr_vsb); |
460 | 831 | if (vep->include_src != NULL) |
461 | 0 | VSB_destroy(&vep->include_src); |
462 | 831 | return; |
463 | 831 | } |
464 | 8.86k | vep->include_src = vep->attr_vsb; |
465 | 8.86k | vep->attr_vsb = NULL; |
466 | 8.86k | } |
467 | | |
468 | | static void |
469 | | include_attr_onerror(struct vep_state *vep) |
470 | 0 | { |
471 | |
|
472 | 0 | vep->include_continue = !vstrcmp("continue", VSB_data(vep->attr_vsb)); |
473 | 0 | VSB_destroy(&vep->attr_vsb); |
474 | 0 | } |
475 | | |
476 | | static void v_matchproto_() |
477 | | vep_do_include(struct vep_state *vep, enum dowhat what) |
478 | 19.0k | { |
479 | 19.0k | const char *p, *q, *h; |
480 | 19.0k | ssize_t l; |
481 | 19.0k | char incl; |
482 | | |
483 | 19.0k | Debug("DO_INCLUDE(%d)\n", what); |
484 | 19.0k | if (what == DO_ATTR) { |
485 | 10.6k | Debug("ATTR (%s) (%s)\n", vep->match_hit->match, |
486 | 10.6k | VSB_data(vep->attr_vsb)); |
487 | 10.6k | if (!vstrcmp("src=", vep->match_hit->match)) { |
488 | 10.6k | include_attr_src(vep); |
489 | 10.6k | return; |
490 | 10.6k | } |
491 | 0 | if (!vstrcmp("onerror=", vep->match_hit->match)) { |
492 | 0 | include_attr_onerror(vep); |
493 | 0 | return; |
494 | 0 | } |
495 | 0 | WRONG("Unhandled <esi:include> attribute"); |
496 | 0 | } |
497 | 8.46k | assert(what == DO_TAG); |
498 | 8.46k | if (!vep->emptytag) |
499 | 8.24k | vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); |
500 | 8.46k | if (vep->include_src == NULL) { |
501 | 553 | vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); |
502 | 553 | return; |
503 | 553 | } |
504 | | |
505 | | /* |
506 | | * Strictly speaking, we ought to spit out any piled up skip before |
507 | | * emitting the VEC for the include, but objectively that makes no |
508 | | * difference and robs us of a chance to collapse another skip into |
509 | | * this on so we don't do that. |
510 | | * However, we cannot tolerate any verbatim stuff piling up. |
511 | | * The mark_skip() before calling dostuff should have taken |
512 | | * care of that. Make sure. |
513 | | */ |
514 | 7.90k | assert(vep->o_wait == 0 || vep->last_mark == SKIP); |
515 | | /* XXX: what if it contains NUL bytes ?? */ |
516 | 7.90k | p = VSB_data(vep->include_src); |
517 | 7.90k | l = VSB_len(vep->include_src); |
518 | 7.90k | h = 0; |
519 | | |
520 | 7.90k | incl = vep->include_continue ? VEC_IC : VEC_IA; |
521 | | |
522 | 7.90k | if (l > 7 && !memcmp(p, "http://", 7)) { |
523 | 1.31k | h = p + 7; |
524 | 1.31k | p = strchr(h, '/'); |
525 | 1.31k | if (p == NULL) { |
526 | 363 | vep_error(vep, |
527 | 363 | "ESI 1.0 <esi:include> invalid src= URL"); |
528 | 363 | vep->state = VEP_TAGERROR; |
529 | 363 | AZ(vep->attr_vsb); |
530 | 363 | VSB_destroy(&vep->include_src); |
531 | 363 | return; |
532 | 363 | } |
533 | 956 | Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); |
534 | 956 | VSB_printf(vep->vsb, "%c", incl); |
535 | 956 | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
536 | 6.59k | } else if (l > 8 && !memcmp(p, "https://", 8)) { |
537 | 1.50k | if (!vep->flags.esi_ignore_https) { |
538 | 416 | vep_warn(vep, |
539 | 416 | "ESI 1.0 <esi:include> with https:// ignored"); |
540 | 416 | vep->state = VEP_TAGERROR; |
541 | 416 | AZ(vep->attr_vsb); |
542 | 416 | VSB_destroy(&vep->include_src); |
543 | 416 | return; |
544 | 416 | } |
545 | 1.09k | vep_warn(vep, |
546 | 1.09k | "ESI 1.0 <esi:include> https:// treated as http://"); |
547 | 1.09k | h = p + 8; |
548 | 1.09k | p = strchr(h, '/'); |
549 | 1.09k | if (p == NULL) { |
550 | 395 | vep_error(vep, |
551 | 395 | "ESI 1.0 <esi:include> invalid src= URL"); |
552 | 395 | vep->state = VEP_TAGERROR; |
553 | 395 | AZ(vep->attr_vsb); |
554 | 395 | VSB_destroy(&vep->include_src); |
555 | 395 | return; |
556 | 395 | } |
557 | 696 | VSB_printf(vep->vsb, "%c", incl); |
558 | 696 | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
559 | 5.08k | } else if (*p == '/') { |
560 | 608 | VSB_printf(vep->vsb, "%c", incl); |
561 | 608 | VSB_printf(vep->vsb, "%c", 0); |
562 | 4.47k | } else { |
563 | 4.47k | VSB_printf(vep->vsb, "%c", incl); |
564 | 4.47k | VSB_printf(vep->vsb, "%c", 0); |
565 | | /* Look for the last / before a '?' */ |
566 | 4.47k | h = NULL; |
567 | 8.95k | for (q = vep->url; *q && *q != '?'; q++) |
568 | 4.47k | if (*q == '/') |
569 | 4.47k | h = q; |
570 | 4.47k | if (h == NULL) |
571 | 0 | h = q + 1; |
572 | | |
573 | 4.47k | Debug("INCL:: [%.*s]/[%s]\n", |
574 | 4.47k | (int)(h - vep->url), vep->url, p); |
575 | 4.47k | VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); |
576 | 4.47k | } |
577 | 6.73k | l -= (p - VSB_data(vep->include_src)); |
578 | 4.42M | for (q = p; *q != '\0'; ) { |
579 | 4.42M | if (*q == '&') { |
580 | 47.4k | #define R(w,f,r) \ |
581 | 210k | if (q + w <= p + l && !memcmp(q, f, w)) { \ |
582 | 11.0k | VSB_printf(vep->vsb, "%c", r); \ |
583 | 11.0k | q += w; \ |
584 | 11.0k | continue; \ |
585 | 11.0k | } |
586 | 47.4k | R(6, "'", '\''); |
587 | 44.0k | R(6, """, '"'); |
588 | 41.3k | R(4, "<", '<'); |
589 | 40.4k | R(4, ">", '>'); |
590 | 36.9k | R(5, "&", '&'); |
591 | 36.4k | } |
592 | 4.40M | VSB_printf(vep->vsb, "%c", *q++); |
593 | 4.40M | } |
594 | 6.73k | #undef R |
595 | 6.73k | VSB_printf(vep->vsb, "%c", 0); |
596 | 6.73k | VSB_destroy(&vep->include_src); |
597 | 6.73k | vep->include_continue = 0; |
598 | 6.73k | } |
599 | | |
600 | | /*--------------------------------------------------------------------- |
601 | | * Lex/Parse object for ESI instructions |
602 | | * |
603 | | * This function is called with the input object piecemeal so do not |
604 | | * assume that we have more than one char available at at time, but |
605 | | * optimize for getting huge chunks. |
606 | | * |
607 | | * NB: At the bottom of this source-file, there is a dot-diagram matching |
608 | | * NB: the state-machine. Please maintain it along with the code. |
609 | | */ |
610 | | |
611 | | void |
612 | | VEP_Parse(struct vep_state *vep, const char *p, size_t l) |
613 | 1.62k | { |
614 | 1.62k | const char *e; |
615 | 1.62k | struct vep_match *vm; |
616 | 1.62k | int i; |
617 | | |
618 | 1.62k | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
619 | 1.62k | assert(l > 0); |
620 | | |
621 | 1.62k | if (vep->startup) { |
622 | | /* |
623 | | * We must force the GZIP header out as a SKIP string, |
624 | | * otherwise an object starting with <esi:include would |
625 | | * have its GZIP header appear after the included object |
626 | | * (e000026.vtc) |
627 | | */ |
628 | 1.62k | vep->ver_p = ""; |
629 | 1.62k | vep->last_mark = SKIP; |
630 | 1.62k | vep_mark_common(vep, vep->ver_p, VERBATIM); |
631 | 1.62k | vep->startup = 0; |
632 | 1.62k | AZ(vep->hack_p); |
633 | 1.62k | vep->hack_p = p; |
634 | 1.62k | } |
635 | | |
636 | 1.62k | vep->ver_p = p; |
637 | | |
638 | 1.62k | e = p + l; |
639 | | |
640 | 673k | while (p < e) { |
641 | 672k | AN(vep->state); |
642 | 672k | Debug("EP %s %d (%.*s) [%.*s]\n", |
643 | 672k | vep->state, |
644 | 672k | vep->remove, |
645 | 672k | vep->tag_i, vep->tag, |
646 | 672k | (e - p) > 10 ? 10 : (int)(e-p), p); |
647 | 672k | assert(p >= vep->ver_p); |
648 | | |
649 | | /****************************************************** |
650 | | * SECTION A |
651 | | */ |
652 | | |
653 | 672k | if (vep->state == VEP_START) { |
654 | 3.42k | if (vep->flags.esi_remove_bom && |
655 | 3.31k | *p == (char)0xeb) { |
656 | 1.83k | vep->match = vep_match_bom; |
657 | 1.83k | vep->state = VEP_MATCH; |
658 | 1.83k | } else |
659 | 1.59k | vep->state = VEP_BOM; |
660 | 668k | } else if (vep->state == VEP_BOM) { |
661 | 1.60k | vep_mark_skip(vep, p); |
662 | 1.60k | if (vep->flags.esi_disable_xml_check) |
663 | 1.50k | vep->state = VEP_NEXTTAG; |
664 | 97 | else |
665 | 97 | vep->state = VEP_TESTXML; |
666 | 667k | } else if (vep->state == VEP_TESTXML) { |
667 | | /* |
668 | | * If the first non-whitespace char is different |
669 | | * from '<' we assume this is not XML. |
670 | | */ |
671 | 323 | while (p < e && vct_islws(*p)) |
672 | 226 | p++; |
673 | 97 | vep_mark_verbatim(vep, p); |
674 | 97 | if (p < e && *p == '<') { |
675 | 12 | p++; |
676 | 12 | vep->state = VEP_STARTTAG; |
677 | 85 | } else if (p < e && *p == (char)0xeb) { |
678 | 1 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
679 | 1 | "WARN: No ESI processing, " |
680 | 1 | "first char not '<' but BOM." |
681 | 1 | " (See feature esi_remove_bom)" |
682 | 1 | ); |
683 | 1 | vep->state = VEP_NOTXML; |
684 | 84 | } else if (p < e) { |
685 | 76 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
686 | 76 | "WARN: No ESI processing, " |
687 | 76 | "first char not '<'." |
688 | 76 | " (See feature esi_disable_xml_check)" |
689 | 76 | ); |
690 | 76 | vep->state = VEP_NOTXML; |
691 | 76 | } |
692 | 666k | } else if (vep->state == VEP_NOTXML) { |
693 | | /* |
694 | | * This is not recognized as XML, just skip thru |
695 | | * vfp_esi_end() will handle the rest |
696 | | */ |
697 | 77 | p = e; |
698 | 77 | vep_mark_verbatim(vep, p); |
699 | | |
700 | | /****************************************************** |
701 | | * SECTION B |
702 | | */ |
703 | | |
704 | 666k | } else if (vep->state == VEP_NOTMYTAG) { |
705 | 25.0k | if (vep->flags.esi_ignore_other_elements) { |
706 | 24.8k | p++; |
707 | 24.8k | vep->state = VEP_NEXTTAG; |
708 | 24.8k | } else { |
709 | 273 | vep->tag_i = 0; |
710 | 36.2k | while (p < e) { |
711 | 36.2k | if (*p++ == '>') { |
712 | 259 | vep->state = VEP_NEXTTAG; |
713 | 259 | break; |
714 | 259 | } |
715 | 36.2k | } |
716 | 273 | } |
717 | 25.0k | if (p == e && !vep->remove) |
718 | 47 | vep_mark_verbatim(vep, p); |
719 | 641k | } else if (vep->state == VEP_NEXTTAG) { |
720 | | /* |
721 | | * Hunt for start of next tag and keep an eye |
722 | | * out for end of EsiCmt if armed. |
723 | | */ |
724 | 107k | vep->emptytag = 0; |
725 | 107k | vep->attr = NULL; |
726 | 107k | vep->dostuff = NULL; |
727 | 14.6M | while (p < e && *p != '<') { |
728 | 14.5M | if (vep->esicmt_p == NULL) { |
729 | 14.1M | p++; |
730 | 14.1M | continue; |
731 | 14.1M | } |
732 | 364k | if (*p != *vep->esicmt_p) { |
733 | 247k | p++; |
734 | 247k | vep->esicmt_p = vep->esicmt; |
735 | 247k | continue; |
736 | 247k | } |
737 | 116k | if (!vep->remove && vep->esicmt_p == vep->esicmt) |
738 | 38.4k | vep_mark_verbatim(vep, p); |
739 | 116k | p++; |
740 | 116k | if (*++vep->esicmt_p == '\0') { |
741 | 37.7k | vep->esi_found = 1; |
742 | 37.7k | vep->esicmt = NULL; |
743 | 37.7k | vep->esicmt_p = NULL; |
744 | | /* |
745 | | * The end of the esicmt |
746 | | * should not be emitted. |
747 | | * But the stuff before should |
748 | | */ |
749 | 37.7k | vep_mark_skip(vep, p); |
750 | 37.7k | } |
751 | 116k | } |
752 | 107k | if (p < e) { |
753 | 107k | if (!vep->remove) |
754 | 89.7k | vep_mark_verbatim(vep, p); |
755 | 107k | assert(*p == '<'); |
756 | 107k | p++; |
757 | 107k | vep->state = VEP_STARTTAG; |
758 | 107k | } else if (vep->esicmt_p == vep->esicmt && !vep->remove) |
759 | 231 | vep_mark_verbatim(vep, p); |
760 | | |
761 | | /****************************************************** |
762 | | * SECTION C |
763 | | */ |
764 | | |
765 | 533k | } else if (vep->state == VEP_STARTTAG) { |
766 | | /* Start of tag, set up match table */ |
767 | 107k | vep->endtag = 0; |
768 | 107k | vep->match = vep_match_starttag; |
769 | 107k | vep->state = VEP_MATCH; |
770 | 426k | } else if (vep->state == VEP_COMMENT) { |
771 | 1.57k | vep->esicmt_p = vep->esicmt = NULL; |
772 | 1.57k | vep->until_p = vep->until = "-->"; |
773 | 1.57k | vep->until_s = VEP_NEXTTAG; |
774 | 1.57k | vep->state = VEP_UNTIL; |
775 | 424k | } else if (vep->state == VEP_COMMENTESI) { |
776 | 38.6k | if (vep->remove) |
777 | 1.78k | vep_error(vep, |
778 | 1.78k | "ESI 1.0 Nested <!--esi" |
779 | 1.78k | " element in <esi:remove>"); |
780 | 38.6k | vep->esicmt_p = vep->esicmt = "-->"; |
781 | 38.6k | vep->state = VEP_NEXTTAG; |
782 | 38.6k | vep_mark_skip(vep, p); |
783 | 386k | } else if (vep->state == VEP_CDATA) { |
784 | | /* |
785 | | * Easy: just look for the end of CDATA |
786 | | */ |
787 | 3.61k | vep->until_p = vep->until = "]]>"; |
788 | 3.61k | vep->until_s = VEP_NEXTTAG; |
789 | 3.61k | vep->state = VEP_UNTIL; |
790 | 382k | } else if (vep->state == VEP_ESIENDTAG) { |
791 | 2.27k | vep->endtag = 1; |
792 | 2.27k | vep->state = VEP_ESITAG; |
793 | 380k | } else if (vep->state == VEP_ESITAG) { |
794 | 38.1k | vep->in_esi_tag = 1; |
795 | 38.1k | vep->esi_found = 1; |
796 | 38.1k | vep_mark_skip(vep, p); |
797 | 38.1k | vep->match = vep_match_esi; |
798 | 38.1k | vep->state = VEP_MATCH; |
799 | 342k | } else if (vep->state == VEP_ESIINCLUDE) { |
800 | 20.5k | if (vep->remove) { |
801 | 5.44k | vep_error(vep, |
802 | 5.44k | "ESI 1.0 <esi:include> element" |
803 | 5.44k | " nested in <esi:remove>"); |
804 | 5.44k | vep->state = VEP_TAGERROR; |
805 | 15.0k | } else if (vep->endtag) { |
806 | 273 | vep_error(vep, |
807 | 273 | "ESI 1.0 </esi:include> illegal end-tag"); |
808 | 273 | vep->state = VEP_TAGERROR; |
809 | 14.7k | } else { |
810 | 14.7k | vep->dostuff = vep_do_include; |
811 | 14.7k | vep->state = VEP_INTAG; |
812 | 14.7k | vep->attr = vep_match_attr_include; |
813 | 14.7k | } |
814 | 321k | } else if (vep->state == VEP_ESIREMOVE) { |
815 | 8.22k | vep->dostuff = vep_do_remove; |
816 | 8.22k | vep->state = VEP_INTAG; |
817 | 313k | } else if (vep->state == VEP_ESICOMMENT) { |
818 | 3.23k | if (vep->remove) { |
819 | 843 | vep_error(vep, |
820 | 843 | "ESI 1.0 <esi:comment> element" |
821 | 843 | " nested in <esi:remove>"); |
822 | 843 | vep->state = VEP_TAGERROR; |
823 | 2.39k | } else if (vep->endtag) { |
824 | 379 | vep_error(vep, |
825 | 379 | "ESI 1.0 </esi:comment> illegal end-tag"); |
826 | 379 | vep->state = VEP_TAGERROR; |
827 | 2.01k | } else { |
828 | 2.01k | vep->dostuff = vep_do_comment; |
829 | 2.01k | vep->state = VEP_INTAG; |
830 | 2.01k | } |
831 | 310k | } else if (vep->state == VEP_ESIBOGON) { |
832 | 6.13k | vep_error(vep, |
833 | 6.13k | "ESI 1.0 <esi:bogus> element"); |
834 | 6.13k | vep->state = VEP_TAGERROR; |
835 | | |
836 | | /****************************************************** |
837 | | * SECTION D |
838 | | */ |
839 | | |
840 | 304k | } else if (vep->state == VEP_INTAG) { |
841 | 40.6k | vep->tag_i = 0; |
842 | 48.8k | while (p < e && vct_islws(*p) && !vep->emptytag) { |
843 | 8.20k | p++; |
844 | 8.20k | vep->canattr = 1; |
845 | 8.20k | } |
846 | 40.6k | if (p < e && *p == '/' && !vep->emptytag) { |
847 | 4.24k | p++; |
848 | 4.24k | vep->emptytag = 1; |
849 | 4.24k | vep->canattr = 0; |
850 | 4.24k | } |
851 | 40.6k | if (p < e && *p == '>') { |
852 | 16.7k | p++; |
853 | 16.7k | AN(vep->dostuff); |
854 | 16.7k | vep_mark_skip(vep, p); |
855 | 16.7k | vep->dostuff(vep, DO_TAG); |
856 | 16.7k | vep->in_esi_tag = 0; |
857 | 16.7k | vep->state = VEP_NEXTTAG; |
858 | 23.8k | } else if (p < e && vep->emptytag) { |
859 | 516 | vep_error(vep, |
860 | 516 | "XML 1.0 '>' does not follow '/' in tag"); |
861 | 516 | vep->state = VEP_TAGERROR; |
862 | 23.3k | } else if (p < e && vep->canattr && |
863 | 21.8k | vct_isxmlnamestart(*p)) { |
864 | 21.0k | vep->state = VEP_ATTR; |
865 | 21.0k | } else if (p < e) { |
866 | 2.27k | vep_error(vep, |
867 | 2.27k | "XML 1.0 Illegal attribute start char"); |
868 | 2.27k | vep->state = VEP_TAGERROR; |
869 | 2.27k | } |
870 | 263k | } else if (vep->state == VEP_TAGERROR) { |
871 | 3.92M | while (p < e && *p != '>') |
872 | 3.90M | p++; |
873 | 21.1k | if (p < e) { |
874 | 20.9k | p++; |
875 | 20.9k | vep_mark_skip(vep, p); |
876 | 20.9k | vep->in_esi_tag = 0; |
877 | 20.9k | vep->state = VEP_NEXTTAG; |
878 | 20.9k | if (vep->attr_vsb) |
879 | 1.22k | VSB_destroy(&vep->attr_vsb); |
880 | 20.9k | } |
881 | | |
882 | | /****************************************************** |
883 | | * SECTION E |
884 | | */ |
885 | | |
886 | 242k | } else if (vep->state == VEP_ATTR) { |
887 | 21.0k | AZ(vep->attr_delim); |
888 | 21.0k | if (vep->attr == NULL) { |
889 | 4.97k | p++; |
890 | 4.97k | AZ(vep->attr_vsb); |
891 | 4.97k | vep->state = VEP_SKIPATTR; |
892 | 16.0k | } else { |
893 | 16.0k | vep->match = vep->attr; |
894 | 16.0k | vep->state = VEP_MATCH; |
895 | 16.0k | } |
896 | 221k | } else if (vep->state == VEP_SKIPATTR) { |
897 | 29.9k | while (p < e && vct_isxmlname(*p)) |
898 | 21.9k | p++; |
899 | 7.94k | if (p < e && *p == '=') { |
900 | 3.51k | p++; |
901 | 3.51k | vep->state = VEP_ATTRDELIM; |
902 | 4.43k | } else if (p < e && *p == '>') { |
903 | 2.15k | vep->state = VEP_INTAG; |
904 | 2.28k | } else if (p < e && *p == '/') { |
905 | 403 | vep->state = VEP_INTAG; |
906 | 1.88k | } else if (p < e && vct_issp(*p)) { |
907 | 1.27k | vep->state = VEP_INTAG; |
908 | 1.27k | } else if (p < e) { |
909 | 591 | vep_error(vep, |
910 | 591 | "XML 1.0 Illegal attr char"); |
911 | 591 | vep->state = VEP_TAGERROR; |
912 | 591 | } |
913 | 213k | } else if (vep->state == VEP_ATTRGETVAL) { |
914 | 13.0k | AZ(vep->attr_vsb); |
915 | 13.0k | vep->attr_vsb = VSB_new_auto(); |
916 | 13.0k | vep->state = VEP_ATTRDELIM; |
917 | 200k | } else if (vep->state == VEP_ATTRDELIM) { |
918 | 16.5k | AZ(vep->attr_delim); |
919 | 16.5k | if (*p == '"' || *p == '\'') { |
920 | 754 | vep->attr_delim = *p++; |
921 | 754 | vep->state = VEP_ATTRVAL; |
922 | 15.8k | } else if (!vct_issp(*p)) { |
923 | 14.4k | vep->attr_delim = ' '; |
924 | 14.4k | vep->state = VEP_ATTRVAL; |
925 | 14.4k | } else { |
926 | 1.41k | vep_error(vep, |
927 | 1.41k | "XML 1.0 Illegal attribute delimiter"); |
928 | 1.41k | vep->state = VEP_TAGERROR; |
929 | 1.41k | } |
930 | | |
931 | 183k | } else if (vep->state == VEP_ATTRVAL) { |
932 | 17.4M | while (p < e && *p != '>' && *p != vep->attr_delim && |
933 | 17.4M | (vep->attr_delim != ' ' || !vct_issp(*p))) { |
934 | 17.4M | if (vep->attr_vsb != NULL) |
935 | 17.2M | VSB_putc(vep->attr_vsb, *p); |
936 | 17.4M | p++; |
937 | 17.4M | } |
938 | 15.1k | if (p < e && *p == '>') { |
939 | 1.51k | vep_error(vep, |
940 | 1.51k | "XML 1.0 Missing end attribute delimiter"); |
941 | 1.51k | vep->state = VEP_TAGERROR; |
942 | 1.51k | vep->attr_delim = 0; |
943 | 1.51k | if (vep->attr_vsb != NULL) { |
944 | 1.19k | AZ(VSB_finish(vep->attr_vsb)); |
945 | 1.19k | VSB_destroy(&vep->attr_vsb); |
946 | 1.19k | } |
947 | 13.6k | } else if (p < e) { |
948 | 13.5k | vep->attr_delim = 0; |
949 | 13.5k | p++; |
950 | 13.5k | vep->state = VEP_INTAG; |
951 | 13.5k | if (vep->attr_vsb != NULL) { |
952 | 10.6k | AZ(VSB_finish(vep->attr_vsb)); |
953 | 10.6k | AN(vep->dostuff); |
954 | 10.6k | vep->dostuff(vep, DO_ATTR); |
955 | 10.6k | vep->attr_vsb = NULL; |
956 | 10.6k | } |
957 | 13.5k | } |
958 | | |
959 | | /****************************************************** |
960 | | * Utility Section |
961 | | */ |
962 | | |
963 | 168k | } else if (vep->state == VEP_MATCH) { |
964 | | /* |
965 | | * Match against a table |
966 | | */ |
967 | 163k | vm = vep_match(vep, p, e); |
968 | 163k | vep->match_hit = vm; |
969 | 163k | if (vm != NULL) { |
970 | 163k | if (vm->match != NULL) |
971 | 129k | p += vstrlen(vm->match); |
972 | 163k | vep->state = *vm->state; |
973 | 163k | vep->match = NULL; |
974 | 163k | vep->tag_i = 0; |
975 | 163k | } else { |
976 | 40 | assert(p + sizeof(vep->tag) >= e); |
977 | 40 | memcpy(vep->tag, p, e - p); |
978 | 40 | vep->tag_i = e - p; |
979 | 40 | vep->state = VEP_MATCHBUF; |
980 | 40 | p = e; |
981 | 40 | } |
982 | 163k | } else if (vep->state == VEP_MATCHBUF) { |
983 | | /* |
984 | | * Match against a table while split over input |
985 | | * sections. |
986 | | */ |
987 | 0 | AN(vep->match); |
988 | 0 | i = sizeof(vep->tag) - vep->tag_i; |
989 | 0 | if (i > e - p) |
990 | 0 | i = e - p; |
991 | 0 | memcpy(vep->tag + vep->tag_i, p, i); |
992 | 0 | vm = vep_match(vep, vep->tag, |
993 | 0 | vep->tag + vep->tag_i + i); |
994 | 0 | Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", |
995 | 0 | vep->tag_i + i, vep->tag, |
996 | 0 | vep->tag_i, |
997 | 0 | i, |
998 | 0 | vm, |
999 | 0 | vm ? vm->match : "(nil)"); |
1000 | |
|
1001 | 0 | if (vm == NULL) { |
1002 | 0 | vep->tag_i += i; |
1003 | 0 | p += i; |
1004 | 0 | assert(p == e); |
1005 | 0 | } else { |
1006 | 0 | vep->match_hit = vm; |
1007 | 0 | vep->state = *vm->state; |
1008 | 0 | if (vm->match != NULL) { |
1009 | 0 | i = vstrlen(vm->match); |
1010 | 0 | if (i > vep->tag_i) |
1011 | 0 | p += i - vep->tag_i; |
1012 | 0 | } |
1013 | 0 | vep->match = NULL; |
1014 | 0 | vep->tag_i = 0; |
1015 | 0 | } |
1016 | 5.19k | } else if (vep->state == VEP_UNTIL) { |
1017 | | /* |
1018 | | * Skip until we see magic string |
1019 | | */ |
1020 | 12.9M | while (p < e) { |
1021 | 12.9M | if (*p++ != *vep->until_p++) { |
1022 | 12.9M | vep->until_p = vep->until; |
1023 | 12.9M | } else if (*vep->until_p == '\0') { |
1024 | 5.12k | vep->state = vep->until_s; |
1025 | 5.12k | break; |
1026 | 5.12k | } |
1027 | 12.9M | } |
1028 | 5.19k | if (p == e && !vep->remove) |
1029 | 69 | vep_mark_verbatim(vep, p); |
1030 | 5.19k | } else { |
1031 | 0 | Debug("*** Unknown state %s\n", vep->state); |
1032 | 0 | WRONG("WRONG ESI PARSER STATE"); |
1033 | 0 | } |
1034 | 672k | } |
1035 | | /* |
1036 | | * We must always mark up the storage we got, try to do so |
1037 | | * in the most efficient way, in particular with respect to |
1038 | | * minimizing and limiting use of pending. |
1039 | | */ |
1040 | 1.62k | if (p == vep->ver_p) |
1041 | 1.01k | ; |
1042 | 611 | else if (vep->in_esi_tag) |
1043 | 354 | vep_mark_skip(vep, p); |
1044 | 257 | else if (vep->remove) |
1045 | 31 | vep_mark_skip(vep, p); |
1046 | 226 | else |
1047 | 226 | vep_mark_pending(vep, p); |
1048 | 1.62k | } |
1049 | | |
1050 | | /*--------------------------------------------------------------------- |
1051 | | */ |
1052 | | |
1053 | | static ssize_t v_matchproto_(vep_callback_t) |
1054 | | vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) |
1055 | 466k | { |
1056 | 466k | ssize_t *s; |
1057 | | |
1058 | 466k | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1059 | 466k | AN(priv); |
1060 | 466k | s = priv; |
1061 | 466k | *s += l; |
1062 | 466k | (void)flg; |
1063 | 466k | return (*s); |
1064 | 466k | } |
1065 | | |
1066 | | /*--------------------------------------------------------------------- |
1067 | | */ |
1068 | | |
1069 | | struct vep_state * |
1070 | | VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, |
1071 | | void *cb_priv, struct vep_flags flags) |
1072 | 1.62k | { |
1073 | 1.62k | struct vep_state *vep; |
1074 | | |
1075 | 1.62k | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1076 | 1.62k | CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); |
1077 | 1.62k | vep = WS_Alloc(vc->resp->ws, sizeof *vep); |
1078 | 1.62k | if (vep == NULL) { |
1079 | 0 | VSLb(vc->wrk->vsl, SLT_VCL_Error, |
1080 | 0 | "VEP_Init() workspace overflow"); |
1081 | 0 | return (NULL); |
1082 | 0 | } |
1083 | | |
1084 | 1.62k | INIT_OBJ(vep, VEP_MAGIC); |
1085 | 1.62k | vep->flags = flags; |
1086 | 1.62k | vep->url = req->hd[HTTP_HDR_URL].b; |
1087 | 1.62k | vep->vc = vc; |
1088 | 1.62k | vep->vsb = VSB_new_auto(); |
1089 | 1.62k | AN(vep->vsb); |
1090 | | |
1091 | 1.62k | if (cb != NULL) { |
1092 | 0 | vep->dogzip = 1; |
1093 | | /* XXX */ |
1094 | 0 | VSB_printf(vep->vsb, "%c", VEC_GZ); |
1095 | 0 | vep->cb = cb; |
1096 | 0 | vep->cb_priv = cb_priv; |
1097 | 1.62k | } else { |
1098 | 1.62k | vep->cb = vep_default_cb; |
1099 | 1.62k | vep->cb_priv = &vep->cb_x; |
1100 | 1.62k | } |
1101 | | |
1102 | 1.62k | vep->state = VEP_START; |
1103 | 1.62k | vep->crc = crc32(0L, Z_NULL, 0); |
1104 | 1.62k | vep->crcp = crc32(0L, Z_NULL, 0); |
1105 | | |
1106 | 1.62k | vep->startup = 1; |
1107 | 1.62k | return (vep); |
1108 | 1.62k | } |
1109 | | |
1110 | | /*--------------------------------------------------------------------- |
1111 | | */ |
1112 | | |
1113 | | struct vsb * |
1114 | | VEP_Finish(struct vep_state *vep) |
1115 | 1.62k | { |
1116 | 1.62k | ssize_t l, lcb; |
1117 | | |
1118 | 1.62k | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
1119 | | |
1120 | 1.62k | if (vep->include_src) |
1121 | 43 | VSB_destroy(&vep->include_src); |
1122 | 1.62k | if (vep->attr_vsb) |
1123 | 42 | VSB_destroy(&vep->attr_vsb); |
1124 | | |
1125 | 1.62k | if (vep->state != VEP_START && |
1126 | 1.61k | vep->state != VEP_BOM && |
1127 | 1.61k | vep->state != VEP_TESTXML && |
1128 | 1.60k | vep->state != VEP_NOTXML && |
1129 | 1.52k | vep->state != VEP_NEXTTAG) { |
1130 | 583 | vep_error(vep, "VEP ended inside a tag"); |
1131 | 583 | } |
1132 | | |
1133 | 1.62k | if (vep->o_pending) |
1134 | 226 | vep_mark_common(vep, vep->ver_p, vep->last_mark); |
1135 | 1.62k | if (vep->o_wait > 0) { |
1136 | 1.59k | lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); |
1137 | 1.59k | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
1138 | 1.59k | } |
1139 | | // NB: We don't account for PAD+SUM+LEN in gzipped objects |
1140 | 1.62k | (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); |
1141 | | |
1142 | 1.62k | AZ(VSB_finish(vep->vsb)); |
1143 | 1.62k | l = VSB_len(vep->vsb); |
1144 | 1.62k | if (vep->esi_found && l > 0) |
1145 | 1.11k | return (vep->vsb); |
1146 | 511 | VSB_destroy(&vep->vsb); |
1147 | | return (NULL); |
1148 | 1.62k | } |
1149 | | |
1150 | | #if 0 |
1151 | | |
1152 | | digraph xml { |
1153 | | rankdir="LR" |
1154 | | size="7,10" |
1155 | | ################################################################# |
1156 | | # SECTION A |
1157 | | # |
1158 | | |
1159 | | START [shape=ellipse] |
1160 | | TESTXML [shape=ellipse] |
1161 | | NOTXML [shape=ellipse] |
1162 | | NEXTTAGa [shape=hexagon, label="NEXTTAG"] |
1163 | | STARTTAGa [shape=hexagon, label="STARTTAG"] |
1164 | | START -> TESTXML |
1165 | | START -> NEXTTAGa [style=dotted, label="syntax:1"] |
1166 | | TESTXML -> TESTXML [label="lws"] |
1167 | | TESTXML -> NOTXML |
1168 | | TESTXML -> STARTTAGa [label="'<'"] |
1169 | | |
1170 | | ################################################################# |
1171 | | # SECTION B |
1172 | | |
1173 | | NOTMYTAG [shape=ellipse] |
1174 | | NEXTTAG [shape=ellipse] |
1175 | | NOTMYTAG -> NEXTTAG [style=dotted, label="syntax:2"] |
1176 | | STARTTAGb [shape=hexagon, label="STARTTAG"] |
1177 | | NOTMYTAG -> NEXTTAG [label="'>'"] |
1178 | | NOTMYTAG -> NOTMYTAG [label="*"] |
1179 | | NEXTTAG -> NEXTTAG [label="'-->'"] |
1180 | | NEXTTAG -> NEXTTAG [label="*"] |
1181 | | NEXTTAG -> STARTTAGb [label="'<'"] |
1182 | | |
1183 | | ################################################################# |
1184 | | # SECTION C |
1185 | | |
1186 | | STARTTAG [shape=ellipse] |
1187 | | COMMENT [shape=ellipse] |
1188 | | CDATA [shape=ellipse] |
1189 | | ESITAG [shape=ellipse] |
1190 | | ESIETAG [shape=ellipse] |
1191 | | ESIINCLUDE [shape=ellipse] |
1192 | | ESIREMOVE [shape=ellipse] |
1193 | | ESICOMMENT [shape=ellipse] |
1194 | | ESIBOGON [shape=ellipse] |
1195 | | INTAGc [shape=hexagon, label="INTAG"] |
1196 | | NOTMYTAGc [shape=hexagon, label="NOTMYTAG"] |
1197 | | NEXTTAGc [shape=hexagon, label="NEXTTAG"] |
1198 | | TAGERRORc [shape=hexagon, label="TAGERROR"] |
1199 | | C1 [shape=circle,label=""] |
1200 | | STARTTAG -> COMMENT [label="'<!--'"] |
1201 | | STARTTAG -> ESITAG [label="'<esi'"] |
1202 | | STARTTAG -> CDATA [label="'<![CDATA['"] |
1203 | | STARTTAG -> NOTMYTAGc [label="'*'"] |
1204 | | COMMENT -> NEXTTAGc [label="'esi'"] |
1205 | | COMMENT -> C1 [label="*"] |
1206 | | C1 -> C1 [label="*"] |
1207 | | C1 -> NEXTTAGc [label="-->"] |
1208 | | CDATA -> CDATA [label="*"] |
1209 | | CDATA -> NEXTTAGc [label="]]>"] |
1210 | | ESITAG -> ESIINCLUDE [label="'include'"] |
1211 | | ESITAG -> ESIREMOVE [label="'remove'"] |
1212 | | ESITAG -> ESICOMMENT [label="'comment'"] |
1213 | | ESITAG -> ESIBOGON [label="*"] |
1214 | | ESICOMMENT -> INTAGc |
1215 | | ESICOMMENT -> TAGERRORc |
1216 | | ESICOMMENT -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1217 | | ESIREMOVE -> INTAGc |
1218 | | ESIREMOVE -> TAGERRORc |
1219 | | ESIINCLUDE -> INTAGc |
1220 | | ESIINCLUDE -> TAGERRORc |
1221 | | ESIINCLUDE -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1222 | | ESIBOGON -> TAGERRORc |
1223 | | |
1224 | | ################################################################# |
1225 | | # SECTION D |
1226 | | |
1227 | | INTAG [shape=ellipse] |
1228 | | TAGERROR [shape=ellipse] |
1229 | | NEXTTAGd [shape=hexagon, label="NEXTTAG"] |
1230 | | ATTRd [shape=hexagon, label="ATTR"] |
1231 | | D1 [shape=circle, label=""] |
1232 | | D2 [shape=circle, label=""] |
1233 | | INTAG -> D1 [label="lws"] |
1234 | | D1 -> D2 [label="/"] |
1235 | | INTAG -> D2 [label="/"] |
1236 | | INTAG -> NEXTTAGd [label=">"] |
1237 | | D1 -> NEXTTAGd [label=">"] |
1238 | | D2 -> NEXTTAGd [label=">"] |
1239 | | D1 -> ATTRd [label="XMLstartchar"] |
1240 | | D1 -> TAGERROR [label="*"] |
1241 | | D2 -> TAGERROR [label="*"] |
1242 | | TAGERROR -> TAGERROR [label="*"] |
1243 | | TAGERROR -> NEXTTAGd [label="'>'"] |
1244 | | |
1245 | | ################################################################# |
1246 | | # SECTION E |
1247 | | |
1248 | | ATTR [shape=ellipse] |
1249 | | SKIPATTR [shape=ellipse] |
1250 | | ATTRGETVAL [shape=ellipse] |
1251 | | ATTRDELIM [shape=ellipse] |
1252 | | ATTRVAL [shape=ellipse] |
1253 | | TAGERRORe [shape=hexagon, label="TAGERROR"] |
1254 | | INTAGe [shape=hexagon, label="INTAG"] |
1255 | | ATTR -> SKIPATTR [label="*"] |
1256 | | ATTR -> ATTRGETVAL [label="wanted attr"] |
1257 | | SKIPATTR -> SKIPATTR [label="XMLname"] |
1258 | | SKIPATTR -> ATTRDELIM [label="'='"] |
1259 | | SKIPATTR -> TAGERRORe [label="*"] |
1260 | | ATTRGETVAL -> ATTRDELIM |
1261 | | ATTRDELIM -> ATTRVAL [label="\""] |
1262 | | ATTRDELIM -> ATTRVAL [label="\'"] |
1263 | | ATTRDELIM -> ATTRVAL [label="*"] |
1264 | | ATTRDELIM -> TAGERRORe [label="lws"] |
1265 | | ATTRVAL -> TAGERRORe [label="'>'"] |
1266 | | ATTRVAL -> INTAGe [label="delim"] |
1267 | | ATTRVAL -> ATTRVAL [label="*"] |
1268 | | |
1269 | | } |
1270 | | |
1271 | | #endif |