/src/vinyl-cache/bin/vinyld/cache/cache_esi_parse.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c) 2011 Varnish Software AS |
3 | | * All rights reserved. |
4 | | * |
5 | | * Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 | | * |
7 | | * SPDX-License-Identifier: BSD-2-Clause |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | * |
30 | | * VEP Vinyl Esi Parsing |
31 | | */ |
32 | | |
33 | | #include "config.h" |
34 | | |
35 | | #include "cache_vinyld.h" |
36 | | #include "cache_filter.h" |
37 | | |
38 | | #include "cache_vgz.h" |
39 | | #include "cache_esi.h" |
40 | | #include "vct.h" |
41 | | #include "vend.h" |
42 | | #include "vgz.h" |
43 | | |
44 | | //#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) |
45 | | #define Debug(fmt, ...) /**/ |
46 | | |
47 | | struct vep_state; |
48 | | |
49 | | enum dowhat {DO_ATTR, DO_TAG}; |
50 | | typedef void dostuff_f(struct vep_state *, enum dowhat); |
51 | | |
52 | | struct vep_match { |
53 | | const char *match; |
54 | | const char * const *state; |
55 | | }; |
56 | | |
57 | | enum vep_mark { VERBATIM = 0, SKIP }; |
58 | | |
59 | | struct vep_state { |
60 | | unsigned magic; |
61 | | #define VEP_MAGIC 0x55cb9b82 |
62 | | // flags from bereq |
63 | | struct vep_flags flags; |
64 | | |
65 | | struct vsb *vsb; |
66 | | |
67 | | const char *url; |
68 | | struct vfp_ctx *vc; |
69 | | int dogzip; |
70 | | vep_callback_t *cb; |
71 | | void *cb_priv; |
72 | | |
73 | | /* Internal Counter for default call-back function */ |
74 | | ssize_t cb_x; |
75 | | |
76 | | /* parser state */ |
77 | | const char *state; |
78 | | unsigned startup; |
79 | | unsigned esi_found; |
80 | | |
81 | | unsigned endtag; |
82 | | unsigned emptytag; |
83 | | unsigned canattr; |
84 | | |
85 | | unsigned remove; |
86 | | |
87 | | ssize_t o_wait; |
88 | | ssize_t o_pending; |
89 | | ssize_t o_total; |
90 | | uint32_t crc; |
91 | | ssize_t o_crc; |
92 | | uint32_t crcp; |
93 | | ssize_t o_last; |
94 | | |
95 | | const char *hack_p; |
96 | | const char *ver_p; |
97 | | |
98 | | const char *until; |
99 | | const char *until_p; |
100 | | const char *until_s; |
101 | | |
102 | | int in_esi_tag; |
103 | | |
104 | | const char *esicmt; |
105 | | const char *esicmt_p; |
106 | | |
107 | | struct vep_match *attr; |
108 | | struct vsb *attr_vsb; |
109 | | int attr_delim; |
110 | | |
111 | | struct vep_match *match; |
112 | | struct vep_match *match_hit; |
113 | | |
114 | | char tag[8]; |
115 | | int tag_i; |
116 | | |
117 | | dostuff_f *dostuff; |
118 | | |
119 | | struct vsb *include_src; |
120 | | unsigned include_continue; |
121 | | |
122 | | unsigned nm_skip; |
123 | | unsigned nm_verbatim; |
124 | | unsigned nm_pending; |
125 | | enum vep_mark last_mark; |
126 | | }; |
127 | | |
128 | | /*---------------------------------------------------------------------*/ |
129 | | |
130 | | static const char * const VEP_START = "[Start]"; |
131 | | static const char * const VEP_BOM = "[BOM]"; |
132 | | static const char * const VEP_TESTXML = "[TestXml]"; |
133 | | static const char * const VEP_NOTXML = "[NotXml]"; |
134 | | |
135 | | static const char * const VEP_NEXTTAG = "[NxtTag]"; |
136 | | static const char * const VEP_NOTMYTAG = "[NotMyTag]"; |
137 | | |
138 | | static const char * const VEP_STARTTAG = "[StartTag]"; |
139 | | static const char * const VEP_COMMENTESI = "[CommentESI]"; |
140 | | static const char * const VEP_COMMENT = "[Comment]"; |
141 | | static const char * const VEP_CDATA = "[CDATA]"; |
142 | | static const char * const VEP_ESITAG = "[ESITag]"; |
143 | | static const char * const VEP_ESIENDTAG = "[/ESITag]"; |
144 | | |
145 | | static const char * const VEP_ESIREMOVE = "[ESI:Remove]"; |
146 | | static const char * const VEP_ESIINCLUDE = "[ESI:Include]"; |
147 | | static const char * const VEP_ESICOMMENT = "[ESI:Comment]"; |
148 | | static const char * const VEP_ESIBOGON = "[ESI:Bogon]"; |
149 | | |
150 | | static const char * const VEP_INTAG = "[InTag]"; |
151 | | static const char * const VEP_TAGERROR = "[TagError]"; |
152 | | |
153 | | static const char * const VEP_ATTR = "[Attribute]"; |
154 | | static const char * const VEP_SKIPATTR = "[SkipAttribute]"; |
155 | | static const char * const VEP_ATTRDELIM = "[AttrDelim]"; |
156 | | static const char * const VEP_ATTRGETVAL = "[AttrGetValue]"; |
157 | | static const char * const VEP_ATTRVAL = "[AttrValue]"; |
158 | | |
159 | | static const char * const VEP_UNTIL = "[Until]"; |
160 | | static const char * const VEP_MATCHBUF = "[MatchBuf]"; |
161 | | static const char * const VEP_MATCH = "[Match]"; |
162 | | |
163 | | /*---------------------------------------------------------------------*/ |
164 | | |
165 | | static struct vep_match vep_match_starttag[] = { |
166 | | { "!--esi", &VEP_COMMENTESI }, |
167 | | { "!---->", &VEP_NEXTTAG }, |
168 | | { "!--", &VEP_COMMENT }, |
169 | | { "/esi:", &VEP_ESIENDTAG }, |
170 | | { "esi:", &VEP_ESITAG }, |
171 | | { "![CDATA[", &VEP_CDATA }, |
172 | | { NULL, &VEP_NOTMYTAG } |
173 | | }; |
174 | | |
175 | | /*---------------------------------------------------------------------*/ |
176 | | |
177 | | static struct vep_match vep_match_esi[] = { |
178 | | { "include", &VEP_ESIINCLUDE }, |
179 | | { "remove", &VEP_ESIREMOVE }, |
180 | | { "comment", &VEP_ESICOMMENT }, |
181 | | { NULL, &VEP_ESIBOGON } |
182 | | }; |
183 | | |
184 | | /*---------------------------------------------------------------------*/ |
185 | | |
186 | | static struct vep_match vep_match_attr_include[] = { |
187 | | { "src=", &VEP_ATTRGETVAL }, |
188 | | { "onerror=", &VEP_ATTRGETVAL }, |
189 | | { NULL, &VEP_SKIPATTR } |
190 | | }; |
191 | | |
192 | | /*---------------------------------------------------------------------*/ |
193 | | |
194 | | static struct vep_match vep_match_bom[] = { |
195 | | { "\xeb\xbb\xbf", &VEP_START }, |
196 | | { NULL, &VEP_BOM } |
197 | | }; |
198 | | |
199 | | /*-------------------------------------------------------------------- |
200 | | * Report a parsing error |
201 | | */ |
202 | | |
203 | | static void |
204 | | vep_error(const struct vep_state *vep, const char *p) |
205 | 28.6k | { |
206 | 28.6k | VSC_C_main->esi_errors++; |
207 | 28.6k | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s", |
208 | 28.6k | vep->o_last, p); |
209 | 28.6k | } |
210 | | |
211 | | /*-------------------------------------------------------------------- |
212 | | * Report a parsing warning |
213 | | */ |
214 | | |
215 | | static void |
216 | | vep_warn(const struct vep_state *vep, const char *p) |
217 | 10.5k | { |
218 | 10.5k | VSC_C_main->esi_warnings++; |
219 | 10.5k | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s", |
220 | 10.5k | vep->o_last, p); |
221 | 10.5k | } |
222 | | |
223 | | /*--------------------------------------------------------------------- |
224 | | * return match or NULL if more input needed. |
225 | | */ |
226 | | |
227 | | static struct vep_match * |
228 | | vep_match(const struct vep_state *vep, const char *b, const char *e) |
229 | 208k | { |
230 | 208k | struct vep_match *vm; |
231 | 208k | const char *q, *r; |
232 | | |
233 | 208k | AN(vep->match); |
234 | 593k | for (vm = vep->match; vm->match != NULL; vm++) { |
235 | 551k | assert(vstrlen(vm->match) <= sizeof (vep->tag)); |
236 | 551k | r = b; |
237 | 1.54M | for (q = vm->match; *q != '\0' && r < e; q++, r++) |
238 | 1.37M | if (*q != *r) |
239 | 384k | break; |
240 | 551k | if (*q == '\0') |
241 | 166k | break; |
242 | 384k | if (r == e) |
243 | 38 | return (NULL); |
244 | 384k | } |
245 | 208k | return (vm); |
246 | 208k | } |
247 | | |
248 | | /*--------------------------------------------------------------------- |
249 | | * |
250 | | */ |
251 | | |
252 | | static void |
253 | | vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) |
254 | 332k | { |
255 | 332k | uint8_t buf[9]; |
256 | | |
257 | 332k | assert(l > 0); |
258 | 332k | if (l < 256) { |
259 | 330k | buf[0] = (uint8_t)m8; |
260 | 330k | buf[1] = (uint8_t)l; |
261 | 330k | assert((ssize_t)buf[1] == l); |
262 | 330k | VSB_bcat(vep->vsb, buf, 2); |
263 | 330k | } else if (l < 65536) { |
264 | 1.84k | buf[0] = (uint8_t)m16; |
265 | 1.84k | vbe16enc(buf + 1, (uint16_t)l); |
266 | 1.84k | assert((ssize_t)vbe16dec(buf + 1) == l); |
267 | 1.84k | VSB_bcat(vep->vsb, buf, 3); |
268 | 1.84k | } else { |
269 | 161 | buf[0] = (uint8_t)m64; |
270 | 161 | vbe64enc(buf + 1, l); |
271 | 161 | assert((ssize_t)vbe64dec(buf + 1) == l); |
272 | 161 | VSB_bcat(vep->vsb, buf, 9); |
273 | 161 | } |
274 | 332k | } |
275 | | |
276 | | static void |
277 | | vep_emit_skip(const struct vep_state *vep, ssize_t l) |
278 | 177k | { |
279 | | |
280 | 177k | vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); |
281 | 177k | } |
282 | | |
283 | | static void |
284 | | vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) |
285 | 155k | { |
286 | 155k | uint8_t buf[4]; |
287 | | |
288 | 155k | vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); |
289 | 155k | if (vep->dogzip) { |
290 | 0 | vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); |
291 | 0 | vbe32enc(buf, vep->crc); |
292 | 0 | VSB_bcat(vep->vsb, buf, sizeof buf); |
293 | 0 | } |
294 | 155k | } |
295 | | |
296 | | static void |
297 | | vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) |
298 | 334k | { |
299 | | |
300 | 334k | assert(l >= 0); |
301 | 334k | if (l == 0) |
302 | 1.61k | return; |
303 | 332k | assert(mark == SKIP || mark == VERBATIM); |
304 | 332k | if (mark == SKIP) |
305 | 177k | vep_emit_skip(vep, l); |
306 | 155k | else |
307 | 155k | vep_emit_verbatim(vep, l, vep->o_crc); |
308 | | |
309 | 332k | vep->crc = crc32(0L, Z_NULL, 0); |
310 | 332k | vep->o_crc = 0; |
311 | 332k | vep->o_total += l; |
312 | 332k | } |
313 | | |
314 | | /*--------------------------------------------------------------------- |
315 | | * |
316 | | */ |
317 | | |
318 | | static void |
319 | | vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) |
320 | 441k | { |
321 | 441k | ssize_t l, lcb; |
322 | | |
323 | 441k | assert(mark == SKIP || mark == VERBATIM); |
324 | | |
325 | | /* The NO-OP case, no data, no pending data & no change of mode */ |
326 | 441k | if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) |
327 | 0 | return; |
328 | | |
329 | | /* |
330 | | * If we changed mode, emit whatever the opposite mode |
331 | | * assembled before the pending bytes. |
332 | | */ |
333 | | |
334 | 441k | if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { |
335 | 332k | lcb = vep->cb(vep->vc, vep->cb_priv, 0, |
336 | 332k | mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); |
337 | 332k | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
338 | 332k | vep->o_last = lcb; |
339 | 332k | vep->o_wait = 0; |
340 | 332k | } |
341 | | |
342 | | /* Transfer pending bytes CRC into active mode CRC */ |
343 | 441k | if (vep->o_pending) { |
344 | 220 | (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, |
345 | 220 | VGZ_NORMAL); |
346 | 220 | if (vep->o_crc == 0) { |
347 | 67 | vep->crc = vep->crcp; |
348 | 67 | vep->o_crc = vep->o_pending; |
349 | 153 | } else { |
350 | 153 | vep->crc = crc32_combine(vep->crc, |
351 | 153 | vep->crcp, vep->o_pending); |
352 | 153 | vep->o_crc += vep->o_pending; |
353 | 153 | } |
354 | 220 | vep->crcp = crc32(0L, Z_NULL, 0); |
355 | 220 | vep->o_wait += vep->o_pending; |
356 | 220 | vep->o_pending = 0; |
357 | 220 | } |
358 | | |
359 | | /* * Process this bit of input */ |
360 | 441k | AN(vep->ver_p); |
361 | 441k | l = p - vep->ver_p; |
362 | 441k | assert(l >= 0); |
363 | 441k | vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); |
364 | 441k | vep->o_crc += l; |
365 | 441k | vep->ver_p = p; |
366 | | |
367 | 441k | vep->o_wait += l; |
368 | 441k | vep->last_mark = mark; |
369 | 441k | (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); |
370 | 441k | } |
371 | | |
372 | | static void |
373 | | vep_mark_verbatim(struct vep_state *vep, const char *p) |
374 | 214k | { |
375 | | |
376 | 214k | vep_mark_common(vep, p, VERBATIM); |
377 | 214k | vep->nm_verbatim++; |
378 | 214k | } |
379 | | |
380 | | static void |
381 | | vep_mark_skip(struct vep_state *vep, const char *p) |
382 | 225k | { |
383 | | |
384 | 225k | vep_mark_common(vep, p, SKIP); |
385 | 225k | vep->nm_skip++; |
386 | 225k | } |
387 | | |
388 | | static void |
389 | | vep_mark_pending(struct vep_state *vep, const char *p) |
390 | 220 | { |
391 | 220 | ssize_t l; |
392 | | |
393 | 220 | AN(vep->ver_p); |
394 | 220 | l = p - vep->ver_p; |
395 | 220 | assert(l > 0); |
396 | 220 | vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); |
397 | 220 | vep->ver_p = p; |
398 | | |
399 | 220 | vep->o_pending += l; |
400 | 220 | vep->nm_pending++; |
401 | 220 | } |
402 | | |
403 | | /*--------------------------------------------------------------------- |
404 | | */ |
405 | | |
406 | | static void v_matchproto_() |
407 | | vep_do_comment(struct vep_state *vep, enum dowhat what) |
408 | 790 | { |
409 | 790 | Debug("DO_COMMENT(%d)\n", what); |
410 | 790 | assert(what == DO_TAG); |
411 | 790 | if (!vep->emptytag) |
412 | 479 | vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); |
413 | 790 | } |
414 | | |
415 | | /*--------------------------------------------------------------------- |
416 | | */ |
417 | | |
418 | | static void v_matchproto_() |
419 | | vep_do_remove(struct vep_state *vep, enum dowhat what) |
420 | 5.32k | { |
421 | 5.32k | Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", |
422 | 5.32k | what, vep->endtag, vep->emptytag, vep->remove); |
423 | 5.32k | assert(what == DO_TAG); |
424 | 5.32k | if (vep->emptytag) |
425 | 1.97k | vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); |
426 | 3.35k | else if (vep->remove && !vep->endtag) |
427 | 912 | vep_error(vep, "ESI 1.0 <esi:remove> already open"); |
428 | 2.44k | else if (!vep->remove && vep->endtag) |
429 | 321 | vep_error(vep, "ESI 1.0 <esi:remove> not open"); |
430 | 2.12k | else |
431 | 2.12k | vep->remove = !vep->endtag; |
432 | 5.32k | } |
433 | | |
434 | | /*--------------------------------------------------------------------- |
435 | | */ |
436 | | |
437 | | static void |
438 | | include_attr_src(struct vep_state *vep) |
439 | 11.8k | { |
440 | 11.8k | const char *p; |
441 | | |
442 | 11.8k | if (vep->include_src != NULL) { |
443 | 1.02k | vep_error(vep, |
444 | 1.02k | "ESI 1.0 <esi:include> " |
445 | 1.02k | "has multiple src= attributes"); |
446 | 1.02k | vep->state = VEP_TAGERROR; |
447 | 1.02k | VSB_destroy(&vep->attr_vsb); |
448 | 1.02k | VSB_destroy(&vep->include_src); |
449 | 1.02k | return; |
450 | 1.02k | } |
451 | 11.8M | for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) |
452 | 11.8M | if (vct_islws(*p)) |
453 | 536 | break; |
454 | 10.8k | if (*p != '\0') { |
455 | 536 | vep_error(vep, |
456 | 536 | "ESI 1.0 <esi:include> " |
457 | 536 | "has whitespace in src= attribute"); |
458 | 536 | vep->state = VEP_TAGERROR; |
459 | 536 | VSB_destroy(&vep->attr_vsb); |
460 | 536 | if (vep->include_src != NULL) |
461 | 0 | VSB_destroy(&vep->include_src); |
462 | 536 | return; |
463 | 536 | } |
464 | 10.2k | vep->include_src = vep->attr_vsb; |
465 | 10.2k | vep->attr_vsb = NULL; |
466 | 10.2k | } |
467 | | |
468 | | static void |
469 | | include_attr_onerror(struct vep_state *vep) |
470 | 0 | { |
471 | |
|
472 | 0 | vep->include_continue = !vstrcmp("continue", VSB_data(vep->attr_vsb)); |
473 | 0 | VSB_destroy(&vep->attr_vsb); |
474 | 0 | } |
475 | | |
476 | | static void v_matchproto_() |
477 | | vep_do_include(struct vep_state *vep, enum dowhat what) |
478 | 21.8k | { |
479 | 21.8k | const char *p, *q, *h; |
480 | 21.8k | ssize_t l; |
481 | 21.8k | char incl; |
482 | | |
483 | 21.8k | Debug("DO_INCLUDE(%d)\n", what); |
484 | 21.8k | if (what == DO_ATTR) { |
485 | 11.8k | Debug("ATTR (%s) (%s)\n", vep->match_hit->match, |
486 | 11.8k | VSB_data(vep->attr_vsb)); |
487 | 11.8k | if (!vstrcmp("src=", vep->match_hit->match)) { |
488 | 11.8k | include_attr_src(vep); |
489 | 11.8k | return; |
490 | 11.8k | } |
491 | 0 | if (!vstrcmp("onerror=", vep->match_hit->match)) { |
492 | 0 | include_attr_onerror(vep); |
493 | 0 | return; |
494 | 0 | } |
495 | 0 | WRONG("Unhandled <esi:include> attribute"); |
496 | 0 | } |
497 | 10.0k | assert(what == DO_TAG); |
498 | 10.0k | if (!vep->emptytag) |
499 | 9.56k | vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); |
500 | 10.0k | if (vep->include_src == NULL) { |
501 | 812 | vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); |
502 | 812 | return; |
503 | 812 | } |
504 | | |
505 | | /* |
506 | | * Strictly speaking, we ought to spit out any piled up skip before |
507 | | * emitting the VEC for the include, but objectively that makes no |
508 | | * difference and robs us of a chance to collapse another skip into |
509 | | * this on so we don't do that. |
510 | | * However, we cannot tolerate any verbatim stuff piling up. |
511 | | * The mark_skip() before calling dostuff should have taken |
512 | | * care of that. Make sure. |
513 | | */ |
514 | 9.22k | assert(vep->o_wait == 0 || vep->last_mark == SKIP); |
515 | | /* XXX: what if it contains NUL bytes ?? */ |
516 | 9.22k | p = VSB_data(vep->include_src); |
517 | 9.22k | l = VSB_len(vep->include_src); |
518 | 9.22k | h = 0; |
519 | | |
520 | 9.22k | incl = vep->include_continue ? VEC_IC : VEC_IA; |
521 | | |
522 | 9.22k | if (l > 7 && !memcmp(p, "http://", 7)) { |
523 | 3.47k | h = p + 7; |
524 | 3.47k | p = strchr(h, '/'); |
525 | 3.47k | if (p == NULL) { |
526 | 1.55k | vep_error(vep, |
527 | 1.55k | "ESI 1.0 <esi:include> invalid src= URL"); |
528 | 1.55k | vep->state = VEP_TAGERROR; |
529 | 1.55k | AZ(vep->attr_vsb); |
530 | 1.55k | VSB_destroy(&vep->include_src); |
531 | 1.55k | return; |
532 | 1.55k | } |
533 | 1.92k | Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); |
534 | 1.92k | VSB_printf(vep->vsb, "%c", incl); |
535 | 1.92k | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
536 | 5.75k | } else if (l > 8 && !memcmp(p, "https://", 8)) { |
537 | 965 | if (!vep->flags.esi_ignore_https) { |
538 | 274 | vep_warn(vep, |
539 | 274 | "ESI 1.0 <esi:include> with https:// ignored"); |
540 | 274 | vep->state = VEP_TAGERROR; |
541 | 274 | AZ(vep->attr_vsb); |
542 | 274 | VSB_destroy(&vep->include_src); |
543 | 274 | return; |
544 | 274 | } |
545 | 691 | vep_warn(vep, |
546 | 691 | "ESI 1.0 <esi:include> https:// treated as http://"); |
547 | 691 | h = p + 8; |
548 | 691 | p = strchr(h, '/'); |
549 | 691 | if (p == NULL) { |
550 | 341 | vep_error(vep, |
551 | 341 | "ESI 1.0 <esi:include> invalid src= URL"); |
552 | 341 | vep->state = VEP_TAGERROR; |
553 | 341 | AZ(vep->attr_vsb); |
554 | 341 | VSB_destroy(&vep->include_src); |
555 | 341 | return; |
556 | 341 | } |
557 | 350 | VSB_printf(vep->vsb, "%c", incl); |
558 | 350 | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
559 | 4.78k | } else if (*p == '/') { |
560 | 851 | VSB_printf(vep->vsb, "%c", incl); |
561 | 851 | VSB_printf(vep->vsb, "%c", 0); |
562 | 3.93k | } else { |
563 | 3.93k | VSB_printf(vep->vsb, "%c", incl); |
564 | 3.93k | VSB_printf(vep->vsb, "%c", 0); |
565 | | /* Look for the last / before a '?' */ |
566 | 3.93k | h = NULL; |
567 | 7.87k | for (q = vep->url; *q && *q != '?'; q++) |
568 | 3.93k | if (*q == '/') |
569 | 3.93k | h = q; |
570 | 3.93k | if (h == NULL) |
571 | 0 | h = q + 1; |
572 | | |
573 | 3.93k | Debug("INCL:: [%.*s]/[%s]\n", |
574 | 3.93k | (int)(h - vep->url), vep->url, p); |
575 | 3.93k | VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); |
576 | 3.93k | } |
577 | 7.05k | l -= (p - VSB_data(vep->include_src)); |
578 | 4.15M | for (q = p; *q != '\0'; ) { |
579 | 4.15M | if (*q == '&') { |
580 | 33.0k | #define R(w,f,r) \ |
581 | 141k | if (q + w <= p + l && !memcmp(q, f, w)) { \ |
582 | 11.0k | VSB_printf(vep->vsb, "%c", r); \ |
583 | 11.0k | q += w; \ |
584 | 11.0k | continue; \ |
585 | 11.0k | } |
586 | 33.0k | R(6, "'", '\''); |
587 | 29.3k | R(6, """, '"'); |
588 | 28.3k | R(4, "<", '<'); |
589 | 27.9k | R(4, ">", '>'); |
590 | 22.5k | R(5, "&", '&'); |
591 | 21.9k | } |
592 | 4.14M | VSB_printf(vep->vsb, "%c", *q++); |
593 | 4.14M | } |
594 | 7.05k | #undef R |
595 | 7.05k | VSB_printf(vep->vsb, "%c", 0); |
596 | 7.05k | VSB_destroy(&vep->include_src); |
597 | 7.05k | vep->include_continue = 0; |
598 | 7.05k | } |
599 | | |
600 | | /*--------------------------------------------------------------------- |
601 | | * Lex/Parse object for ESI instructions |
602 | | * |
603 | | * This function is called with the input object piecemeal so do not |
604 | | * assume that we have more than one char available at at time, but |
605 | | * optimize for getting huge chunks. |
606 | | * |
607 | | * NB: At the bottom of this source-file, there is a dot-diagram matching |
608 | | * NB: the state-machine. Please maintain it along with the code. |
609 | | */ |
610 | | |
611 | | void |
612 | | VEP_Parse(struct vep_state *vep, const char *p, size_t l) |
613 | 1.61k | { |
614 | 1.61k | const char *e; |
615 | 1.61k | struct vep_match *vm; |
616 | 1.61k | int i; |
617 | | |
618 | 1.61k | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
619 | 1.61k | assert(l > 0); |
620 | | |
621 | 1.61k | if (vep->startup) { |
622 | | /* |
623 | | * We must force the GZIP header out as a SKIP string, |
624 | | * otherwise an object starting with <esi:include would |
625 | | * have its GZIP header appear after the included object |
626 | | * (e000026.vtc) |
627 | | */ |
628 | 1.61k | vep->ver_p = ""; |
629 | 1.61k | vep->last_mark = SKIP; |
630 | 1.61k | vep_mark_common(vep, vep->ver_p, VERBATIM); |
631 | 1.61k | vep->startup = 0; |
632 | 1.61k | AZ(vep->hack_p); |
633 | 1.61k | vep->hack_p = p; |
634 | 1.61k | } |
635 | | |
636 | 1.61k | vep->ver_p = p; |
637 | | |
638 | 1.61k | e = p + l; |
639 | | |
640 | 864k | while (p < e) { |
641 | 862k | AN(vep->state); |
642 | 862k | Debug("EP %s %d (%.*s) [%.*s]\n", |
643 | 862k | vep->state, |
644 | 862k | vep->remove, |
645 | 862k | vep->tag_i, vep->tag, |
646 | 862k | (e - p) > 10 ? 10 : (int)(e-p), p); |
647 | 862k | assert(p >= vep->ver_p); |
648 | | |
649 | | /****************************************************** |
650 | | * SECTION A |
651 | | */ |
652 | | |
653 | 862k | if (vep->state == VEP_START) { |
654 | 3.37k | if (vep->flags.esi_remove_bom && |
655 | 3.26k | *p == (char)0xeb) { |
656 | 1.79k | vep->match = vep_match_bom; |
657 | 1.79k | vep->state = VEP_MATCH; |
658 | 1.79k | } else |
659 | 1.58k | vep->state = VEP_BOM; |
660 | 859k | } else if (vep->state == VEP_BOM) { |
661 | 1.59k | vep_mark_skip(vep, p); |
662 | 1.59k | if (vep->flags.esi_disable_xml_check) |
663 | 1.50k | vep->state = VEP_NEXTTAG; |
664 | 94 | else |
665 | 94 | vep->state = VEP_TESTXML; |
666 | 857k | } else if (vep->state == VEP_TESTXML) { |
667 | | /* |
668 | | * If the first non-whitespace char is different |
669 | | * from '<' we assume this is not XML. |
670 | | */ |
671 | 487 | while (p < e && vct_islws(*p)) |
672 | 393 | p++; |
673 | 94 | vep_mark_verbatim(vep, p); |
674 | 94 | if (p < e && *p == '<') { |
675 | 12 | p++; |
676 | 12 | vep->state = VEP_STARTTAG; |
677 | 82 | } else if (p < e && *p == (char)0xeb) { |
678 | 2 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
679 | 2 | "WARN: No ESI processing, " |
680 | 2 | "first char not '<' but BOM." |
681 | 2 | " (See feature esi_remove_bom)" |
682 | 2 | ); |
683 | 2 | vep->state = VEP_NOTXML; |
684 | 80 | } else if (p < e) { |
685 | 72 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
686 | 72 | "WARN: No ESI processing, " |
687 | 72 | "first char not '<'." |
688 | 72 | " (See feature esi_disable_xml_check)" |
689 | 72 | ); |
690 | 72 | vep->state = VEP_NOTXML; |
691 | 72 | } |
692 | 857k | } else if (vep->state == VEP_NOTXML) { |
693 | | /* |
694 | | * This is not recognized as XML, just skip thru |
695 | | * vfp_esi_end() will handle the rest |
696 | | */ |
697 | 74 | p = e; |
698 | 74 | vep_mark_verbatim(vep, p); |
699 | | |
700 | | /****************************************************** |
701 | | * SECTION B |
702 | | */ |
703 | | |
704 | 857k | } else if (vep->state == VEP_NOTMYTAG) { |
705 | 29.6k | if (vep->flags.esi_ignore_other_elements) { |
706 | 29.2k | p++; |
707 | 29.2k | vep->state = VEP_NEXTTAG; |
708 | 29.2k | } else { |
709 | 386 | vep->tag_i = 0; |
710 | 11.3k | while (p < e) { |
711 | 11.3k | if (*p++ == '>') { |
712 | 366 | vep->state = VEP_NEXTTAG; |
713 | 366 | break; |
714 | 366 | } |
715 | 11.3k | } |
716 | 386 | } |
717 | 29.6k | if (p == e && !vep->remove) |
718 | 55 | vep_mark_verbatim(vep, p); |
719 | 827k | } else if (vep->state == VEP_NEXTTAG) { |
720 | | /* |
721 | | * Hunt for start of next tag and keep an eye |
722 | | * out for end of EsiCmt if armed. |
723 | | */ |
724 | 149k | vep->emptytag = 0; |
725 | 149k | vep->attr = NULL; |
726 | 149k | vep->dostuff = NULL; |
727 | 13.1M | while (p < e && *p != '<') { |
728 | 12.9M | if (vep->esicmt_p == NULL) { |
729 | 12.4M | p++; |
730 | 12.4M | continue; |
731 | 12.4M | } |
732 | 569k | if (*p != *vep->esicmt_p) { |
733 | 340k | p++; |
734 | 340k | vep->esicmt_p = vep->esicmt; |
735 | 340k | continue; |
736 | 340k | } |
737 | 229k | if (!vep->remove && vep->esicmt_p == vep->esicmt) |
738 | 77.2k | vep_mark_verbatim(vep, p); |
739 | 229k | p++; |
740 | 229k | if (*++vep->esicmt_p == '\0') { |
741 | 74.6k | vep->esi_found = 1; |
742 | 74.6k | vep->esicmt = NULL; |
743 | 74.6k | vep->esicmt_p = NULL; |
744 | | /* |
745 | | * The end of the esicmt |
746 | | * should not be emitted. |
747 | | * But the stuff before should |
748 | | */ |
749 | 74.6k | vep_mark_skip(vep, p); |
750 | 74.6k | } |
751 | 229k | } |
752 | 149k | if (p < e) { |
753 | 148k | if (!vep->remove) |
754 | 136k | vep_mark_verbatim(vep, p); |
755 | 148k | assert(*p == '<'); |
756 | 148k | p++; |
757 | 148k | vep->state = VEP_STARTTAG; |
758 | 148k | } else if (vep->esicmt_p == vep->esicmt && !vep->remove) |
759 | 231 | vep_mark_verbatim(vep, p); |
760 | | |
761 | | /****************************************************** |
762 | | * SECTION C |
763 | | */ |
764 | | |
765 | 678k | } else if (vep->state == VEP_STARTTAG) { |
766 | | /* Start of tag, set up match table */ |
767 | 148k | vep->endtag = 0; |
768 | 148k | vep->match = vep_match_starttag; |
769 | 148k | vep->state = VEP_MATCH; |
770 | 529k | } else if (vep->state == VEP_COMMENT) { |
771 | 2.32k | vep->esicmt_p = vep->esicmt = NULL; |
772 | 2.32k | vep->until_p = vep->until = "-->"; |
773 | 2.32k | vep->until_s = VEP_NEXTTAG; |
774 | 2.32k | vep->state = VEP_UNTIL; |
775 | 527k | } else if (vep->state == VEP_COMMENTESI) { |
776 | 75.9k | if (vep->remove) |
777 | 1.23k | vep_error(vep, |
778 | 1.23k | "ESI 1.0 Nested <!--esi" |
779 | 1.23k | " element in <esi:remove>"); |
780 | 75.9k | vep->esicmt_p = vep->esicmt = "-->"; |
781 | 75.9k | vep->state = VEP_NEXTTAG; |
782 | 75.9k | vep_mark_skip(vep, p); |
783 | 451k | } else if (vep->state == VEP_CDATA) { |
784 | | /* |
785 | | * Easy: just look for the end of CDATA |
786 | | */ |
787 | 3.59k | vep->until_p = vep->until = "]]>"; |
788 | 3.59k | vep->until_s = VEP_NEXTTAG; |
789 | 3.59k | vep->state = VEP_UNTIL; |
790 | 447k | } else if (vep->state == VEP_ESIENDTAG) { |
791 | 3.00k | vep->endtag = 1; |
792 | 3.00k | vep->state = VEP_ESITAG; |
793 | 444k | } else if (vep->state == VEP_ESITAG) { |
794 | 36.7k | vep->in_esi_tag = 1; |
795 | 36.7k | vep->esi_found = 1; |
796 | 36.7k | vep_mark_skip(vep, p); |
797 | 36.7k | vep->match = vep_match_esi; |
798 | 36.7k | vep->state = VEP_MATCH; |
799 | 408k | } else if (vep->state == VEP_ESIINCLUDE) { |
800 | 22.6k | if (vep->remove) { |
801 | 2.97k | vep_error(vep, |
802 | 2.97k | "ESI 1.0 <esi:include> element" |
803 | 2.97k | " nested in <esi:remove>"); |
804 | 2.97k | vep->state = VEP_TAGERROR; |
805 | 19.7k | } else if (vep->endtag) { |
806 | 206 | vep_error(vep, |
807 | 206 | "ESI 1.0 </esi:include> illegal end-tag"); |
808 | 206 | vep->state = VEP_TAGERROR; |
809 | 19.5k | } else { |
810 | 19.5k | vep->dostuff = vep_do_include; |
811 | 19.5k | vep->state = VEP_INTAG; |
812 | 19.5k | vep->attr = vep_match_attr_include; |
813 | 19.5k | } |
814 | 385k | } else if (vep->state == VEP_ESIREMOVE) { |
815 | 6.84k | vep->dostuff = vep_do_remove; |
816 | 6.84k | vep->state = VEP_INTAG; |
817 | 378k | } else if (vep->state == VEP_ESICOMMENT) { |
818 | 2.15k | if (vep->remove) { |
819 | 742 | vep_error(vep, |
820 | 742 | "ESI 1.0 <esi:comment> element" |
821 | 742 | " nested in <esi:remove>"); |
822 | 742 | vep->state = VEP_TAGERROR; |
823 | 1.40k | } else if (vep->endtag) { |
824 | 523 | vep_error(vep, |
825 | 523 | "ESI 1.0 </esi:comment> illegal end-tag"); |
826 | 523 | vep->state = VEP_TAGERROR; |
827 | 886 | } else { |
828 | 886 | vep->dostuff = vep_do_comment; |
829 | 886 | vep->state = VEP_INTAG; |
830 | 886 | } |
831 | 376k | } else if (vep->state == VEP_ESIBOGON) { |
832 | 5.08k | vep_error(vep, |
833 | 5.08k | "ESI 1.0 <esi:bogus> element"); |
834 | 5.08k | vep->state = VEP_TAGERROR; |
835 | | |
836 | | /****************************************************** |
837 | | * SECTION D |
838 | | */ |
839 | | |
840 | 371k | } else if (vep->state == VEP_INTAG) { |
841 | 48.1k | vep->tag_i = 0; |
842 | 62.1k | while (p < e && vct_islws(*p) && !vep->emptytag) { |
843 | 13.9k | p++; |
844 | 13.9k | vep->canattr = 1; |
845 | 13.9k | } |
846 | 48.1k | if (p < e && *p == '/' && !vep->emptytag) { |
847 | 6.12k | p++; |
848 | 6.12k | vep->emptytag = 1; |
849 | 6.12k | vep->canattr = 0; |
850 | 6.12k | } |
851 | 48.1k | if (p < e && *p == '>') { |
852 | 16.1k | p++; |
853 | 16.1k | AN(vep->dostuff); |
854 | 16.1k | vep_mark_skip(vep, p); |
855 | 16.1k | vep->dostuff(vep, DO_TAG); |
856 | 16.1k | vep->in_esi_tag = 0; |
857 | 16.1k | vep->state = VEP_NEXTTAG; |
858 | 31.9k | } else if (p < e && vep->emptytag) { |
859 | 3.35k | vep_error(vep, |
860 | 3.35k | "XML 1.0 '>' does not follow '/' in tag"); |
861 | 3.35k | vep->state = VEP_TAGERROR; |
862 | 28.6k | } else if (p < e && vep->canattr && |
863 | 27.2k | vct_isxmlnamestart(*p)) { |
864 | 26.2k | vep->state = VEP_ATTR; |
865 | 26.2k | } else if (p < e) { |
866 | 2.31k | vep_error(vep, |
867 | 2.31k | "XML 1.0 Illegal attribute start char"); |
868 | 2.31k | vep->state = VEP_TAGERROR; |
869 | 2.31k | } |
870 | 323k | } else if (vep->state == VEP_TAGERROR) { |
871 | 3.63M | while (p < e && *p != '>') |
872 | 3.61M | p++; |
873 | 20.3k | if (p < e) { |
874 | 20.2k | p++; |
875 | 20.2k | vep_mark_skip(vep, p); |
876 | 20.2k | vep->in_esi_tag = 0; |
877 | 20.2k | vep->state = VEP_NEXTTAG; |
878 | 20.2k | if (vep->attr_vsb) |
879 | 1.91k | VSB_destroy(&vep->attr_vsb); |
880 | 20.2k | } |
881 | | |
882 | | /****************************************************** |
883 | | * SECTION E |
884 | | */ |
885 | | |
886 | 302k | } else if (vep->state == VEP_ATTR) { |
887 | 26.2k | AZ(vep->attr_delim); |
888 | 26.2k | if (vep->attr == NULL) { |
889 | 5.12k | p++; |
890 | 5.12k | AZ(vep->attr_vsb); |
891 | 5.12k | vep->state = VEP_SKIPATTR; |
892 | 21.1k | } else { |
893 | 21.1k | vep->match = vep->attr; |
894 | 21.1k | vep->state = VEP_MATCH; |
895 | 21.1k | } |
896 | 276k | } else if (vep->state == VEP_SKIPATTR) { |
897 | 32.9k | while (p < e && vct_isxmlname(*p)) |
898 | 21.1k | p++; |
899 | 11.8k | if (p < e && *p == '=') { |
900 | 4.61k | p++; |
901 | 4.61k | vep->state = VEP_ATTRDELIM; |
902 | 7.20k | } else if (p < e && *p == '>') { |
903 | 1.63k | vep->state = VEP_INTAG; |
904 | 5.56k | } else if (p < e && *p == '/') { |
905 | 3.56k | vep->state = VEP_INTAG; |
906 | 3.56k | } else if (p < e && vct_issp(*p)) { |
907 | 1.47k | vep->state = VEP_INTAG; |
908 | 1.47k | } else if (p < e) { |
909 | 513 | vep_error(vep, |
910 | 513 | "XML 1.0 Illegal attr char"); |
911 | 513 | vep->state = VEP_TAGERROR; |
912 | 513 | } |
913 | 264k | } else if (vep->state == VEP_ATTRGETVAL) { |
914 | 14.4k | AZ(vep->attr_vsb); |
915 | 14.4k | vep->attr_vsb = VSB_new_auto(); |
916 | 14.4k | vep->state = VEP_ATTRDELIM; |
917 | 250k | } else if (vep->state == VEP_ATTRDELIM) { |
918 | 19.0k | AZ(vep->attr_delim); |
919 | 19.0k | if (*p == '"' || *p == '\'') { |
920 | 996 | vep->attr_delim = *p++; |
921 | 996 | vep->state = VEP_ATTRVAL; |
922 | 18.0k | } else if (!vct_issp(*p)) { |
923 | 15.9k | vep->attr_delim = ' '; |
924 | 15.9k | vep->state = VEP_ATTRVAL; |
925 | 15.9k | } else { |
926 | 2.13k | vep_error(vep, |
927 | 2.13k | "XML 1.0 Illegal attribute delimiter"); |
928 | 2.13k | vep->state = VEP_TAGERROR; |
929 | 2.13k | } |
930 | | |
931 | 231k | } else if (vep->state == VEP_ATTRVAL) { |
932 | 17.8M | while (p < e && *p != '>' && *p != vep->attr_delim && |
933 | 17.8M | (vep->attr_delim != ' ' || !vct_issp(*p))) { |
934 | 17.8M | if (vep->attr_vsb != NULL) |
935 | 17.7M | VSB_putc(vep->attr_vsb, *p); |
936 | 17.8M | p++; |
937 | 17.8M | } |
938 | 16.9k | if (p < e && *p == '>') { |
939 | 1.00k | vep_error(vep, |
940 | 1.00k | "XML 1.0 Missing end attribute delimiter"); |
941 | 1.00k | vep->state = VEP_TAGERROR; |
942 | 1.00k | vep->attr_delim = 0; |
943 | 1.00k | if (vep->attr_vsb != NULL) { |
944 | 638 | AZ(VSB_finish(vep->attr_vsb)); |
945 | 638 | VSB_destroy(&vep->attr_vsb); |
946 | 638 | } |
947 | 15.9k | } else if (p < e) { |
948 | 15.8k | vep->attr_delim = 0; |
949 | 15.8k | p++; |
950 | 15.8k | vep->state = VEP_INTAG; |
951 | 15.8k | if (vep->attr_vsb != NULL) { |
952 | 11.8k | AZ(VSB_finish(vep->attr_vsb)); |
953 | 11.8k | AN(vep->dostuff); |
954 | 11.8k | vep->dostuff(vep, DO_ATTR); |
955 | 11.8k | vep->attr_vsb = NULL; |
956 | 11.8k | } |
957 | 15.8k | } |
958 | | |
959 | | /****************************************************** |
960 | | * Utility Section |
961 | | */ |
962 | | |
963 | 214k | } else if (vep->state == VEP_MATCH) { |
964 | | /* |
965 | | * Match against a table |
966 | | */ |
967 | 208k | vm = vep_match(vep, p, e); |
968 | 208k | vep->match_hit = vm; |
969 | 208k | if (vm != NULL) { |
970 | 208k | if (vm->match != NULL) |
971 | 166k | p += vstrlen(vm->match); |
972 | 208k | vep->state = *vm->state; |
973 | 208k | vep->match = NULL; |
974 | 208k | vep->tag_i = 0; |
975 | 208k | } else { |
976 | 38 | assert(p + sizeof(vep->tag) >= e); |
977 | 38 | memcpy(vep->tag, p, e - p); |
978 | 38 | vep->tag_i = e - p; |
979 | 38 | vep->state = VEP_MATCHBUF; |
980 | 38 | p = e; |
981 | 38 | } |
982 | 208k | } else if (vep->state == VEP_MATCHBUF) { |
983 | | /* |
984 | | * Match against a table while split over input |
985 | | * sections. |
986 | | */ |
987 | 0 | AN(vep->match); |
988 | 0 | i = sizeof(vep->tag) - vep->tag_i; |
989 | 0 | if (i > e - p) |
990 | 0 | i = e - p; |
991 | 0 | memcpy(vep->tag + vep->tag_i, p, i); |
992 | 0 | vm = vep_match(vep, vep->tag, |
993 | 0 | vep->tag + vep->tag_i + i); |
994 | 0 | Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", |
995 | 0 | vep->tag_i + i, vep->tag, |
996 | 0 | vep->tag_i, |
997 | 0 | i, |
998 | 0 | vm, |
999 | 0 | vm ? vm->match : "(nil)"); |
1000 | |
|
1001 | 0 | if (vm == NULL) { |
1002 | 0 | vep->tag_i += i; |
1003 | 0 | p += i; |
1004 | 0 | assert(p == e); |
1005 | 0 | } else { |
1006 | 0 | vep->match_hit = vm; |
1007 | 0 | vep->state = *vm->state; |
1008 | 0 | if (vm->match != NULL) { |
1009 | 0 | i = vstrlen(vm->match); |
1010 | 0 | if (i > vep->tag_i) |
1011 | 0 | p += i - vep->tag_i; |
1012 | 0 | } |
1013 | 0 | vep->match = NULL; |
1014 | 0 | vep->tag_i = 0; |
1015 | 0 | } |
1016 | 5.91k | } else if (vep->state == VEP_UNTIL) { |
1017 | | /* |
1018 | | * Skip until we see magic string |
1019 | | */ |
1020 | 14.1M | while (p < e) { |
1021 | 14.1M | if (*p++ != *vep->until_p++) { |
1022 | 14.0M | vep->until_p = vep->until; |
1023 | 14.0M | } else if (*vep->until_p == '\0') { |
1024 | 5.84k | vep->state = vep->until_s; |
1025 | 5.84k | break; |
1026 | 5.84k | } |
1027 | 14.1M | } |
1028 | 5.91k | if (p == e && !vep->remove) |
1029 | 72 | vep_mark_verbatim(vep, p); |
1030 | 5.91k | } else { |
1031 | 0 | Debug("*** Unknown state %s\n", vep->state); |
1032 | 0 | WRONG("WRONG ESI PARSER STATE"); |
1033 | 0 | } |
1034 | 862k | } |
1035 | | /* |
1036 | | * We must always mark up the storage we got, try to do so |
1037 | | * in the most efficient way, in particular with respect to |
1038 | | * minimizing and limiting use of pending. |
1039 | | */ |
1040 | 1.61k | if (p == vep->ver_p) |
1041 | 1.00k | ; |
1042 | 615 | else if (vep->in_esi_tag) |
1043 | 360 | vep_mark_skip(vep, p); |
1044 | 255 | else if (vep->remove) |
1045 | 35 | vep_mark_skip(vep, p); |
1046 | 220 | else |
1047 | 220 | vep_mark_pending(vep, p); |
1048 | 1.61k | } |
1049 | | |
1050 | | /*--------------------------------------------------------------------- |
1051 | | */ |
1052 | | |
1053 | | static ssize_t v_matchproto_(vep_callback_t) |
1054 | | vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) |
1055 | 777k | { |
1056 | 777k | ssize_t *s; |
1057 | | |
1058 | 777k | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1059 | 777k | AN(priv); |
1060 | 777k | s = priv; |
1061 | 777k | *s += l; |
1062 | 777k | (void)flg; |
1063 | 777k | return (*s); |
1064 | 777k | } |
1065 | | |
1066 | | /*--------------------------------------------------------------------- |
1067 | | */ |
1068 | | |
1069 | | struct vep_state * |
1070 | | VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, |
1071 | | void *cb_priv, struct vep_flags flags) |
1072 | 1.61k | { |
1073 | 1.61k | struct vep_state *vep; |
1074 | | |
1075 | 1.61k | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1076 | 1.61k | CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); |
1077 | 1.61k | vep = WS_Alloc(vc->resp->ws, sizeof *vep); |
1078 | 1.61k | if (vep == NULL) { |
1079 | 0 | VSLb(vc->wrk->vsl, SLT_VCL_Error, |
1080 | 0 | "VEP_Init() workspace overflow"); |
1081 | 0 | return (NULL); |
1082 | 0 | } |
1083 | | |
1084 | 1.61k | INIT_OBJ(vep, VEP_MAGIC); |
1085 | 1.61k | vep->flags = flags; |
1086 | 1.61k | vep->url = req->hd[HTTP_HDR_URL].b; |
1087 | 1.61k | vep->vc = vc; |
1088 | 1.61k | vep->vsb = VSB_new_auto(); |
1089 | 1.61k | AN(vep->vsb); |
1090 | | |
1091 | 1.61k | if (cb != NULL) { |
1092 | 0 | vep->dogzip = 1; |
1093 | | /* XXX */ |
1094 | 0 | VSB_printf(vep->vsb, "%c", VEC_GZ); |
1095 | 0 | vep->cb = cb; |
1096 | 0 | vep->cb_priv = cb_priv; |
1097 | 1.61k | } else { |
1098 | 1.61k | vep->cb = vep_default_cb; |
1099 | 1.61k | vep->cb_priv = &vep->cb_x; |
1100 | 1.61k | } |
1101 | | |
1102 | 1.61k | vep->state = VEP_START; |
1103 | 1.61k | vep->crc = crc32(0L, Z_NULL, 0); |
1104 | 1.61k | vep->crcp = crc32(0L, Z_NULL, 0); |
1105 | | |
1106 | 1.61k | vep->startup = 1; |
1107 | 1.61k | return (vep); |
1108 | 1.61k | } |
1109 | | |
1110 | | /*--------------------------------------------------------------------- |
1111 | | */ |
1112 | | |
1113 | | struct vsb * |
1114 | | VEP_Finish(struct vep_state *vep) |
1115 | 1.61k | { |
1116 | 1.61k | ssize_t l, lcb; |
1117 | | |
1118 | 1.61k | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
1119 | | |
1120 | 1.61k | if (vep->include_src) |
1121 | 46 | VSB_destroy(&vep->include_src); |
1122 | 1.61k | if (vep->attr_vsb) |
1123 | 44 | VSB_destroy(&vep->attr_vsb); |
1124 | | |
1125 | 1.61k | if (vep->state != VEP_START && |
1126 | 1.60k | vep->state != VEP_BOM && |
1127 | 1.60k | vep->state != VEP_TESTXML && |
1128 | 1.59k | vep->state != VEP_NOTXML && |
1129 | 1.52k | vep->state != VEP_NEXTTAG) { |
1130 | 598 | vep_error(vep, "VEP ended inside a tag"); |
1131 | 598 | } |
1132 | | |
1133 | 1.61k | if (vep->o_pending) |
1134 | 220 | vep_mark_common(vep, vep->ver_p, vep->last_mark); |
1135 | 1.61k | if (vep->o_wait > 0) { |
1136 | 1.58k | lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); |
1137 | 1.58k | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
1138 | 1.58k | } |
1139 | | // NB: We don't account for PAD+SUM+LEN in gzipped objects |
1140 | 1.61k | (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); |
1141 | | |
1142 | 1.61k | AZ(VSB_finish(vep->vsb)); |
1143 | 1.61k | l = VSB_len(vep->vsb); |
1144 | 1.61k | if (vep->esi_found && l > 0) |
1145 | 1.12k | return (vep->vsb); |
1146 | 498 | VSB_destroy(&vep->vsb); |
1147 | | return (NULL); |
1148 | 1.61k | } |
1149 | | |
1150 | | #if 0 |
1151 | | |
1152 | | digraph xml { |
1153 | | rankdir="LR" |
1154 | | size="7,10" |
1155 | | ################################################################# |
1156 | | # SECTION A |
1157 | | # |
1158 | | |
1159 | | START [shape=ellipse] |
1160 | | TESTXML [shape=ellipse] |
1161 | | NOTXML [shape=ellipse] |
1162 | | NEXTTAGa [shape=hexagon, label="NEXTTAG"] |
1163 | | STARTTAGa [shape=hexagon, label="STARTTAG"] |
1164 | | START -> TESTXML |
1165 | | START -> NEXTTAGa [style=dotted, label="syntax:1"] |
1166 | | TESTXML -> TESTXML [label="lws"] |
1167 | | TESTXML -> NOTXML |
1168 | | TESTXML -> STARTTAGa [label="'<'"] |
1169 | | |
1170 | | ################################################################# |
1171 | | # SECTION B |
1172 | | |
1173 | | NOTMYTAG [shape=ellipse] |
1174 | | NEXTTAG [shape=ellipse] |
1175 | | NOTMYTAG -> NEXTTAG [style=dotted, label="syntax:2"] |
1176 | | STARTTAGb [shape=hexagon, label="STARTTAG"] |
1177 | | NOTMYTAG -> NEXTTAG [label="'>'"] |
1178 | | NOTMYTAG -> NOTMYTAG [label="*"] |
1179 | | NEXTTAG -> NEXTTAG [label="'-->'"] |
1180 | | NEXTTAG -> NEXTTAG [label="*"] |
1181 | | NEXTTAG -> STARTTAGb [label="'<'"] |
1182 | | |
1183 | | ################################################################# |
1184 | | # SECTION C |
1185 | | |
1186 | | STARTTAG [shape=ellipse] |
1187 | | COMMENT [shape=ellipse] |
1188 | | CDATA [shape=ellipse] |
1189 | | ESITAG [shape=ellipse] |
1190 | | ESIETAG [shape=ellipse] |
1191 | | ESIINCLUDE [shape=ellipse] |
1192 | | ESIREMOVE [shape=ellipse] |
1193 | | ESICOMMENT [shape=ellipse] |
1194 | | ESIBOGON [shape=ellipse] |
1195 | | INTAGc [shape=hexagon, label="INTAG"] |
1196 | | NOTMYTAGc [shape=hexagon, label="NOTMYTAG"] |
1197 | | NEXTTAGc [shape=hexagon, label="NEXTTAG"] |
1198 | | TAGERRORc [shape=hexagon, label="TAGERROR"] |
1199 | | C1 [shape=circle,label=""] |
1200 | | STARTTAG -> COMMENT [label="'<!--'"] |
1201 | | STARTTAG -> ESITAG [label="'<esi'"] |
1202 | | STARTTAG -> CDATA [label="'<![CDATA['"] |
1203 | | STARTTAG -> NOTMYTAGc [label="'*'"] |
1204 | | COMMENT -> NEXTTAGc [label="'esi'"] |
1205 | | COMMENT -> C1 [label="*"] |
1206 | | C1 -> C1 [label="*"] |
1207 | | C1 -> NEXTTAGc [label="-->"] |
1208 | | CDATA -> CDATA [label="*"] |
1209 | | CDATA -> NEXTTAGc [label="]]>"] |
1210 | | ESITAG -> ESIINCLUDE [label="'include'"] |
1211 | | ESITAG -> ESIREMOVE [label="'remove'"] |
1212 | | ESITAG -> ESICOMMENT [label="'comment'"] |
1213 | | ESITAG -> ESIBOGON [label="*"] |
1214 | | ESICOMMENT -> INTAGc |
1215 | | ESICOMMENT -> TAGERRORc |
1216 | | ESICOMMENT -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1217 | | ESIREMOVE -> INTAGc |
1218 | | ESIREMOVE -> TAGERRORc |
1219 | | ESIINCLUDE -> INTAGc |
1220 | | ESIINCLUDE -> TAGERRORc |
1221 | | ESIINCLUDE -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1222 | | ESIBOGON -> TAGERRORc |
1223 | | |
1224 | | ################################################################# |
1225 | | # SECTION D |
1226 | | |
1227 | | INTAG [shape=ellipse] |
1228 | | TAGERROR [shape=ellipse] |
1229 | | NEXTTAGd [shape=hexagon, label="NEXTTAG"] |
1230 | | ATTRd [shape=hexagon, label="ATTR"] |
1231 | | D1 [shape=circle, label=""] |
1232 | | D2 [shape=circle, label=""] |
1233 | | INTAG -> D1 [label="lws"] |
1234 | | D1 -> D2 [label="/"] |
1235 | | INTAG -> D2 [label="/"] |
1236 | | INTAG -> NEXTTAGd [label=">"] |
1237 | | D1 -> NEXTTAGd [label=">"] |
1238 | | D2 -> NEXTTAGd [label=">"] |
1239 | | D1 -> ATTRd [label="XMLstartchar"] |
1240 | | D1 -> TAGERROR [label="*"] |
1241 | | D2 -> TAGERROR [label="*"] |
1242 | | TAGERROR -> TAGERROR [label="*"] |
1243 | | TAGERROR -> NEXTTAGd [label="'>'"] |
1244 | | |
1245 | | ################################################################# |
1246 | | # SECTION E |
1247 | | |
1248 | | ATTR [shape=ellipse] |
1249 | | SKIPATTR [shape=ellipse] |
1250 | | ATTRGETVAL [shape=ellipse] |
1251 | | ATTRDELIM [shape=ellipse] |
1252 | | ATTRVAL [shape=ellipse] |
1253 | | TAGERRORe [shape=hexagon, label="TAGERROR"] |
1254 | | INTAGe [shape=hexagon, label="INTAG"] |
1255 | | ATTR -> SKIPATTR [label="*"] |
1256 | | ATTR -> ATTRGETVAL [label="wanted attr"] |
1257 | | SKIPATTR -> SKIPATTR [label="XMLname"] |
1258 | | SKIPATTR -> ATTRDELIM [label="'='"] |
1259 | | SKIPATTR -> TAGERRORe [label="*"] |
1260 | | ATTRGETVAL -> ATTRDELIM |
1261 | | ATTRDELIM -> ATTRVAL [label="\""] |
1262 | | ATTRDELIM -> ATTRVAL [label="\'"] |
1263 | | ATTRDELIM -> ATTRVAL [label="*"] |
1264 | | ATTRDELIM -> TAGERRORe [label="lws"] |
1265 | | ATTRVAL -> TAGERRORe [label="'>'"] |
1266 | | ATTRVAL -> INTAGe [label="delim"] |
1267 | | ATTRVAL -> ATTRVAL [label="*"] |
1268 | | |
1269 | | } |
1270 | | |
1271 | | #endif |