/src/tidy-html5/src/istack.c
Line | Count | Source |
1 | | /* istack.c -- inline stack for compatibility with Mosaic |
2 | | |
3 | | (c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
4 | | See tidy.h for the copyright notice. |
5 | | |
6 | | */ |
7 | | |
8 | | #include "tidy-int.h" |
9 | | #include "lexer.h" |
10 | | #include "attrs.h" |
11 | | #include "streamio.h" |
12 | | #include "tmbstr.h" |
13 | | |
14 | | /* duplicate attributes */ |
15 | | AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs) |
16 | 557k | { |
17 | 557k | AttVal *newattrs; |
18 | | |
19 | 557k | if (attrs == NULL) |
20 | 533k | return attrs; |
21 | | |
22 | 24.2k | newattrs = TY_(NewAttribute)(doc); |
23 | 24.2k | *newattrs = *attrs; |
24 | 24.2k | newattrs->next = TY_(DupAttrs)( doc, attrs->next ); |
25 | 24.2k | newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute); |
26 | 24.2k | newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value); |
27 | 24.2k | newattrs->dict = TY_(FindAttribute)(doc, newattrs); |
28 | 24.2k | newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL; |
29 | 24.2k | newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL; |
30 | 24.2k | return newattrs; |
31 | 557k | } |
32 | | |
33 | | static Bool IsNodePushable( Node *node ) |
34 | 227k | { |
35 | 227k | if (node->tag == NULL) |
36 | 0 | return no; |
37 | | |
38 | 227k | if (!(node->tag->model & CM_INLINE)) |
39 | 242 | return no; |
40 | | |
41 | 227k | if (node->tag->model & CM_OBJECT) |
42 | 151 | return no; |
43 | | |
44 | | /*\ Issue #92: OLD problem of ins and del which are marked as both |
45 | | * inline and block, thus should NOT ever be 'inserted' |
46 | | \*/ |
47 | 227k | if (nodeIsINS(node) || nodeIsDEL(node)) |
48 | 2.08k | return no; |
49 | | |
50 | 225k | return yes; |
51 | 227k | } |
52 | | |
53 | | /* |
54 | | push a copy of an inline node onto stack |
55 | | but don't push if implicit or OBJECT or APPLET |
56 | | (implicit tags are ones generated from the istack) |
57 | | |
58 | | One issue arises with pushing inlines when |
59 | | the tag is already pushed. For instance: |
60 | | |
61 | | <p><em>text |
62 | | <p><em>more text |
63 | | |
64 | | Shouldn't be mapped to |
65 | | |
66 | | <p><em>text</em></p> |
67 | | <p><em><em>more text</em></em> |
68 | | */ |
69 | | void TY_(PushInline)( TidyDocImpl* doc, Node *node ) |
70 | 541k | { |
71 | 541k | Lexer* lexer = doc->lexer; |
72 | 541k | IStack *istack; |
73 | | |
74 | 541k | if (node->implicit) |
75 | 336k | return; |
76 | | |
77 | 204k | if ( !IsNodePushable(node) ) |
78 | 1.15k | return; |
79 | | |
80 | 203k | if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) ) |
81 | 12.3k | return; |
82 | | |
83 | | /* make sure there is enough space for the stack */ |
84 | 191k | if (lexer->istacksize + 1 > lexer->istacklength) |
85 | 583 | { |
86 | 583 | if (lexer->istacklength == 0) |
87 | 251 | lexer->istacklength = 6; /* this is perhaps excessive */ |
88 | | |
89 | 583 | lexer->istacklength = lexer->istacklength * 2; |
90 | 583 | lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack, |
91 | 583 | sizeof(IStack)*(lexer->istacklength)); |
92 | 583 | } |
93 | | |
94 | 191k | istack = &(lexer->istack[lexer->istacksize]); |
95 | 191k | istack->tag = node->tag; |
96 | | |
97 | 191k | istack->element = TY_(tmbstrdup)(doc->allocator, node->element); |
98 | 191k | istack->attributes = TY_(DupAttrs)( doc, node->attributes ); |
99 | 191k | ++(lexer->istacksize); |
100 | 191k | } |
101 | | |
102 | | static void PopIStack( TidyDocImpl* doc ) |
103 | 191k | { |
104 | 191k | Lexer* lexer = doc->lexer; |
105 | 191k | IStack *istack; |
106 | 191k | AttVal *av; |
107 | | |
108 | 191k | --(lexer->istacksize); |
109 | 191k | istack = &(lexer->istack[lexer->istacksize]); |
110 | | |
111 | 195k | while (istack->attributes) |
112 | 3.64k | { |
113 | 3.64k | av = istack->attributes; |
114 | 3.64k | istack->attributes = av->next; |
115 | 3.64k | TY_(FreeAttribute)( doc, av ); |
116 | 3.64k | } |
117 | 191k | TidyDocFree(doc, istack->element); |
118 | 191k | istack->element = NULL; /* remove the freed element */ |
119 | 191k | } |
120 | | |
121 | | static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid ) |
122 | 7.36k | { |
123 | 7.36k | Lexer* lexer = doc->lexer; |
124 | 7.36k | IStack *istack; |
125 | | |
126 | 35.7k | while (lexer->istacksize > 0) |
127 | 34.6k | { |
128 | 34.6k | PopIStack( doc ); |
129 | 34.6k | istack = &(lexer->istack[lexer->istacksize]); |
130 | 34.6k | if ( istack->tag->id == tid ) |
131 | 6.26k | break; |
132 | 34.6k | } |
133 | 7.36k | } |
134 | | |
135 | | /* pop inline stack */ |
136 | | void TY_(PopInline)( TidyDocImpl* doc, Node *node ) |
137 | 166k | { |
138 | 166k | Lexer* lexer = doc->lexer; |
139 | | |
140 | 166k | if (node) |
141 | 19.0k | { |
142 | 19.0k | if ( !IsNodePushable(node) ) |
143 | 1.07k | return; |
144 | | |
145 | | /* if node is </a> then pop until we find an <a> */ |
146 | 17.9k | if ( nodeIsA(node) ) |
147 | 7.36k | { |
148 | 7.36k | PopIStackUntil( doc, TidyTag_A ); |
149 | 7.36k | return; |
150 | 7.36k | } |
151 | 17.9k | } |
152 | | |
153 | 157k | if (lexer->istacksize > 0) |
154 | 156k | { |
155 | 156k | PopIStack( doc ); |
156 | | |
157 | | /* #427822 - fix by Randy Waki 7 Aug 00 */ |
158 | 156k | if (lexer->insert >= lexer->istack + lexer->istacksize) |
159 | 128 | lexer->insert = NULL; |
160 | 156k | } |
161 | 157k | } |
162 | | |
163 | | Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node ) |
164 | 1.28M | { |
165 | 1.28M | Lexer* lexer = doc->lexer; |
166 | 1.28M | int i; |
167 | | |
168 | 369M | for (i = lexer->istacksize - 1; i >= 0; --i) |
169 | 368M | { |
170 | 368M | if (lexer->istack[i].tag == node->tag) |
171 | 1.05M | return yes; |
172 | 368M | } |
173 | | |
174 | 230k | return no; |
175 | 1.28M | } |
176 | | |
177 | | /* |
178 | | Test whether the last element on the stack has the same type than "node". |
179 | | */ |
180 | | Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node ) |
181 | 3.73k | { |
182 | 3.73k | Lexer* lexer = doc->lexer; |
183 | | |
184 | 3.73k | if ( element && !IsNodePushable(element) ) |
185 | 242 | return no; |
186 | | |
187 | 3.49k | if (lexer->istacksize > 0) { |
188 | 3.35k | if (lexer->istack[lexer->istacksize - 1].tag == node->tag) { |
189 | 0 | return yes; |
190 | 0 | } |
191 | 3.35k | } |
192 | | |
193 | 3.49k | return no; |
194 | 3.49k | } |
195 | | |
196 | | /* |
197 | | This has the effect of inserting "missing" inline |
198 | | elements around the contents of blocklevel elements |
199 | | such as P, TD, TH, DIV, PRE etc. This procedure is |
200 | | called at the start of ParseBlock. when the inline |
201 | | stack is not empty, as will be the case in: |
202 | | |
203 | | <i><h1>italic heading</h1></i> |
204 | | |
205 | | which is then treated as equivalent to |
206 | | |
207 | | <h1><i>italic heading</i></h1> |
208 | | |
209 | | This is implemented by setting the lexer into a mode |
210 | | where it gets tokens from the inline stack rather than |
211 | | from the input stream. |
212 | | */ |
213 | | int TY_(InlineDup)( TidyDocImpl* doc, Node* node ) |
214 | 157k | { |
215 | 157k | Lexer* lexer = doc->lexer; |
216 | 157k | int n; |
217 | | |
218 | 157k | if ((n = lexer->istacksize - lexer->istackbase) > 0) |
219 | 20.2k | { |
220 | 20.2k | lexer->insert = &(lexer->istack[lexer->istackbase]); |
221 | 20.2k | lexer->inode = node; |
222 | 20.2k | } |
223 | | |
224 | 157k | return n; |
225 | 157k | } |
226 | | |
227 | | /* |
228 | | defer duplicates when entering a table or other |
229 | | element where the inlines shouldn't be duplicated |
230 | | */ |
231 | | void TY_(DeferDup)( TidyDocImpl* doc ) |
232 | 26.5k | { |
233 | 26.5k | doc->lexer->insert = NULL; |
234 | 26.5k | doc->lexer->inode = NULL; |
235 | 26.5k | } |
236 | | |
237 | | Node *TY_(InsertedToken)( TidyDocImpl* doc ) |
238 | 341k | { |
239 | 341k | Lexer* lexer = doc->lexer; |
240 | 341k | Node *node; |
241 | 341k | IStack *istack; |
242 | 341k | uint n; |
243 | | |
244 | | /* this will only be NULL if inode != NULL */ |
245 | 341k | if (lexer->insert == NULL) |
246 | 1.49k | { |
247 | 1.49k | node = lexer->inode; |
248 | 1.49k | lexer->inode = NULL; |
249 | 1.49k | return node; |
250 | 1.49k | } |
251 | | |
252 | | /* |
253 | | If this is the "latest" node then update |
254 | | the position, otherwise use current values |
255 | | */ |
256 | | |
257 | 339k | if (lexer->inode == NULL) |
258 | 333k | { |
259 | 333k | lexer->lines = doc->docIn->curline; |
260 | 333k | lexer->columns = doc->docIn->curcol; |
261 | 333k | } |
262 | | |
263 | 339k | node = TY_(NewNode)(doc->allocator, lexer); |
264 | 339k | node->type = StartTag; |
265 | 339k | node->implicit = yes; |
266 | 339k | node->start = lexer->txtstart; |
267 | | /* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */ |
268 | 339k | node->end = lexer->txtend; /* was : lexer->txtstart; */ |
269 | 339k | istack = lexer->insert; |
270 | | |
271 | | /* #if 0 && defined(_DEBUG) */ |
272 | | #if definedENABLE_DEBUG_LOG |
273 | | if ( lexer->istacksize == 0 ) |
274 | | { |
275 | | SPRTF( "WARNING: ZERO sized istack!\n" ); |
276 | | } |
277 | | #endif |
278 | | |
279 | 339k | node->element = TY_(tmbstrdup)(doc->allocator, istack->element); |
280 | 339k | node->tag = istack->tag; |
281 | 339k | node->attributes = TY_(DupAttrs)( doc, istack->attributes ); |
282 | | |
283 | | /* advance lexer to next item on the stack */ |
284 | 339k | n = (uint)(lexer->insert - &(lexer->istack[0])); |
285 | | |
286 | | /* and recover state if we have reached the end */ |
287 | 339k | if (++n < lexer->istacksize) |
288 | 316k | lexer->insert = &(lexer->istack[n]); |
289 | 22.7k | else |
290 | 22.7k | lexer->insert = NULL; |
291 | | |
292 | 339k | return node; |
293 | 341k | } |
294 | | |
295 | | |
296 | | /* |
297 | | We have two CM_INLINE elements pushed ... the first is closing, |
298 | | but, like the browser, the second should be retained ... |
299 | | Like <b>bold <i>bold and italics</b> italics only</i> |
300 | | This function switches the tag positions on the stack, |
301 | | returning 'yes' if both were found in the expected order. |
302 | | */ |
303 | | Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node ) |
304 | 6.74k | { |
305 | 6.74k | Lexer* lexer = doc->lexer; |
306 | 6.74k | if ( lexer |
307 | 6.74k | && element && element->tag |
308 | 6.74k | && node && node->tag |
309 | 6.74k | && TY_(IsPushed)( doc, element ) |
310 | 6.74k | && TY_(IsPushed)( doc, node ) |
311 | 6.74k | && ((lexer->istacksize - lexer->istackbase) >= 2) ) |
312 | 6.12k | { |
313 | | /* we have a chance of succeeding ... */ |
314 | 6.12k | int i; |
315 | 11.1k | for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i) |
316 | 8.92k | { |
317 | 8.92k | if (lexer->istack[i].tag == element->tag) { |
318 | | /* found the element tag - phew */ |
319 | 6.01k | IStack *istack1 = &lexer->istack[i]; |
320 | 6.01k | IStack *istack2 = NULL; |
321 | 6.01k | --i; /* back one more, and continue */ |
322 | 49.1M | for ( ; i >= 0; --i) |
323 | 49.1M | { |
324 | 49.1M | if (lexer->istack[i].tag == node->tag) |
325 | 3.85k | { |
326 | | /* found the element tag - phew */ |
327 | 3.85k | istack2 = &lexer->istack[i]; |
328 | 3.85k | break; |
329 | 3.85k | } |
330 | 49.1M | } |
331 | 6.01k | if ( istack2 ) |
332 | 3.85k | { |
333 | | /* perform the swap */ |
334 | 3.85k | IStack tmp_istack = *istack2; |
335 | 3.85k | *istack2 = *istack1; |
336 | 3.85k | *istack1 = tmp_istack; |
337 | 3.85k | return yes; |
338 | 3.85k | } |
339 | 6.01k | } |
340 | 8.92k | } |
341 | 6.12k | } |
342 | 2.88k | return no; |
343 | 6.74k | } |
344 | | |
345 | | /* |
346 | | We want to push a specific a specific element on the stack, |
347 | | but it may not be the last element, which InlineDup() |
348 | | would handle. Return yes, if found and inserted. |
349 | | */ |
350 | | Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element ) |
351 | 3.85k | { |
352 | 3.85k | Lexer* lexer = doc->lexer; |
353 | 3.85k | int n, i; |
354 | 3.85k | if ( element |
355 | 3.85k | && (element->tag != NULL) |
356 | 3.85k | && ((n = lexer->istacksize - lexer->istackbase) > 0) ) |
357 | 3.85k | { |
358 | 9.12k | for ( i = n - 1; i >=0; --i ) { |
359 | 9.12k | if (lexer->istack[i].tag == element->tag) { |
360 | | /* found our element tag - insert it */ |
361 | 3.85k | lexer->insert = &(lexer->istack[i]); |
362 | 3.85k | lexer->inode = node; |
363 | 3.85k | return yes; |
364 | 3.85k | } |
365 | 9.12k | } |
366 | 3.85k | } |
367 | 0 | return no; |
368 | 3.85k | } |
369 | | |
370 | | /* |
371 | | * local variables: |
372 | | * mode: c |
373 | | * indent-tabs-mode: nil |
374 | | * c-basic-offset: 4 |
375 | | * eval: (c-set-offset 'substatement-open 0) |
376 | | * end: |
377 | | */ |