/src/tidy-html5/src/attrs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* attrs.c -- recognize HTML attributes |
2 | | |
3 | | (c) 1998-2009 (W3C) MIT, ERCIM, Keio University |
4 | | See tidy.h for the copyright notice. |
5 | | |
6 | | */ |
7 | | |
8 | | #include "tidy-int.h" |
9 | | #include "attrs.h" |
10 | | #include "message.h" |
11 | | #include "tmbstr.h" |
12 | | #include "utf8.h" |
13 | | |
14 | | /* |
15 | | Bind attribute types to procedures to check values. |
16 | | You can add new procedures for better validation |
17 | | and each procedure has access to the node in which |
18 | | the attribute occurred as well as the attribute name |
19 | | and its value. |
20 | | |
21 | | By default, attributes are checked without regard |
22 | | to the element they are found on. You have the choice |
23 | | of making the procedure test which element is involved |
24 | | or in writing methods for each element which controls |
25 | | exactly how the attributes of that element are checked. |
26 | | This latter approach is best for detecting the absence |
27 | | of required attributes. |
28 | | */ |
29 | | |
30 | | static AttrCheck CheckAction; |
31 | | static AttrCheck CheckScript; |
32 | | static AttrCheck CheckName; |
33 | | static AttrCheck CheckId; |
34 | | static AttrCheck CheckIs; |
35 | | static AttrCheck CheckAlign; |
36 | | static AttrCheck CheckValign; |
37 | | static AttrCheck CheckBool; |
38 | | static AttrCheck CheckLength; |
39 | | static AttrCheck CheckTarget; |
40 | | static AttrCheck CheckFsubmit; |
41 | | static AttrCheck CheckClear; |
42 | | static AttrCheck CheckShape; |
43 | | static AttrCheck CheckNumber; |
44 | | static AttrCheck CheckScope; |
45 | | static AttrCheck CheckColor; |
46 | | static AttrCheck CheckVType; |
47 | | static AttrCheck CheckScroll; |
48 | | static AttrCheck CheckTextDir; |
49 | | static AttrCheck CheckLang; |
50 | | static AttrCheck CheckLoading; |
51 | | static AttrCheck CheckType; |
52 | | static AttrCheck CheckRDFaSafeCURIE; |
53 | | static AttrCheck CheckRDFaTerm; |
54 | | static AttrCheck CheckRDFaPrefix; |
55 | | static AttrCheck CheckDecimal; |
56 | | static AttrCheck CheckSvgAttr; |
57 | | |
58 | | #define CH_PCDATA NULL |
59 | | #define CH_CHARSET NULL |
60 | | #define CH_TYPE CheckType |
61 | | #define CH_XTYPE NULL |
62 | | #define CH_CHARACTER NULL |
63 | | #define CH_URLS NULL |
64 | 51.9k | #define CH_URL TY_(CheckUrl) |
65 | 26 | #define CH_SCRIPT CheckScript |
66 | | #define CH_ALIGN CheckAlign |
67 | | #define CH_VALIGN CheckValign |
68 | | #define CH_COLOR CheckColor |
69 | | #define CH_CLEAR CheckClear |
70 | | #define CH_BORDER CheckBool /* kludge */ |
71 | | #define CH_LANG CheckLang |
72 | | #define CH_LOADING CheckLoading |
73 | 2.72k | #define CH_BOOL CheckBool |
74 | | #define CH_COLS NULL |
75 | | #define CH_NUMBER CheckNumber |
76 | | #define CH_LENGTH CheckLength |
77 | | #define CH_COORDS NULL |
78 | | #define CH_DATE NULL |
79 | | #define CH_TEXTDIR CheckTextDir |
80 | | #define CH_IDREFS NULL |
81 | | #define CH_IDREF NULL |
82 | | #define CH_IDDEF CheckId |
83 | | #define CH_ISDEF CheckIs |
84 | | #define CH_NAME CheckName |
85 | | #define CH_TFRAME NULL |
86 | | #define CH_FBORDER NULL |
87 | | #define CH_MEDIA NULL |
88 | | #define CH_FSUBMIT CheckFsubmit |
89 | | #define CH_LINKTYPES NULL |
90 | | #define CH_TRULES NULL |
91 | | #define CH_SCOPE CheckScope |
92 | | #define CH_SHAPE CheckShape |
93 | | #define CH_SCROLL CheckScroll |
94 | | #define CH_TARGET CheckTarget |
95 | | #define CH_VTYPE CheckVType |
96 | | #define CH_ACTION CheckAction |
97 | | #define CH_RDFAPREFIX CheckRDFaPrefix |
98 | | #define CH_RDFASCURIE CheckRDFaSafeCURIE |
99 | | #define CH_RDFASCURIES CheckRDFaSafeCURIE |
100 | | #define CH_RDFATERM CheckRDFaTerm |
101 | | #define CH_RDFATERMS CheckRDFaTerm |
102 | | #define CH_DECIMAL CheckDecimal |
103 | | #define CH_SVG CheckSvgAttr |
104 | | |
105 | | /* |
106 | | WARNING: This table /must/ be kept in the EXACT order of the TidyAttrId enum! |
107 | | When running the DEBUG version, this order is checked, in TY_(InitAttrs)(doc), |
108 | | and there is an assert() if any difference found. |
109 | | */ |
110 | | static const Attribute attribute_defs [] = |
111 | | { |
112 | | { TidyAttr_UNKNOWN, "unknown!", NULL }, |
113 | | { TidyAttr_ABBR, "abbr", CH_PCDATA }, |
114 | | { TidyAttr_ACCEPT, "accept", CH_XTYPE }, |
115 | | { TidyAttr_ACCEPT_CHARSET, "accept-charset", CH_CHARSET }, |
116 | | { TidyAttr_ACCESSKEY, "accesskey", CH_CHARACTER }, |
117 | | { TidyAttr_ACTION, "action", CH_ACTION }, |
118 | | { TidyAttr_ADD_DATE, "add_date", CH_PCDATA }, /* A */ |
119 | | { TidyAttr_ALIGN, "align", CH_ALIGN }, /* varies by element */ |
120 | | { TidyAttr_ALINK, "alink", CH_COLOR }, |
121 | | { TidyAttr_ALLOWFULLSCREEN, "allowfullscreen", CH_BOOL }, |
122 | | { TidyAttr_ALT, "alt", CH_PCDATA }, /* nowrap */ |
123 | | { TidyAttr_ARCHIVE, "archive", CH_URLS }, /* space or comma separated list */ |
124 | | { TidyAttr_AXIS, "axis", CH_PCDATA }, |
125 | | { TidyAttr_BACKGROUND, "background", CH_URL }, |
126 | | { TidyAttr_BGCOLOR, "bgcolor", CH_COLOR }, |
127 | | { TidyAttr_BGPROPERTIES, "bgproperties", CH_PCDATA }, /* BODY "fixed" fixes background */ |
128 | | { TidyAttr_BORDER, "border", CH_BORDER }, /* like LENGTH + "border" */ |
129 | | { TidyAttr_BORDERCOLOR, "bordercolor", CH_COLOR }, /* used on TABLE */ |
130 | | { TidyAttr_BOTTOMMARGIN, "bottommargin", CH_NUMBER }, /* used on BODY */ |
131 | | { TidyAttr_CELLPADDING, "cellpadding", CH_LENGTH }, /* % or pixel values */ |
132 | | { TidyAttr_CELLSPACING, "cellspacing", CH_LENGTH }, |
133 | | { TidyAttr_CHAR, "char", CH_CHARACTER }, |
134 | | { TidyAttr_CHAROFF, "charoff", CH_LENGTH }, |
135 | | { TidyAttr_CHARSET, "charset", CH_CHARSET }, |
136 | | { TidyAttr_CHECKED, "checked", CH_BOOL }, /* i.e. "checked" or absent */ |
137 | | { TidyAttr_CITE, "cite", CH_URL }, |
138 | | { TidyAttr_CLASS, "class", CH_PCDATA }, |
139 | | { TidyAttr_CLASSID, "classid", CH_URL }, |
140 | | { TidyAttr_CLEAR, "clear", CH_CLEAR }, /* BR: left, right, all */ |
141 | | { TidyAttr_CODE, "code", CH_PCDATA }, /* APPLET */ |
142 | | { TidyAttr_CODEBASE, "codebase", CH_URL }, /* OBJECT */ |
143 | | { TidyAttr_CODETYPE, "codetype", CH_XTYPE }, /* OBJECT */ |
144 | | { TidyAttr_COLOR, "color", CH_COLOR }, /* BASEFONT, FONT */ |
145 | | { TidyAttr_COLS, "cols", CH_COLS }, /* TABLE & FRAMESET */ |
146 | | { TidyAttr_COLSPAN, "colspan", CH_NUMBER }, |
147 | | { TidyAttr_COMPACT, "compact", CH_BOOL }, /* lists */ |
148 | | { TidyAttr_CONTENT, "content", CH_PCDATA }, |
149 | | { TidyAttr_COORDS, "coords", CH_COORDS }, /* AREA, A */ |
150 | | { TidyAttr_DATA, "data", CH_URL }, /* OBJECT */ |
151 | | { TidyAttr_DATAFLD, "datafld", CH_PCDATA }, /* used on DIV, IMG */ |
152 | | { TidyAttr_DATAFORMATAS, "dataformatas", CH_PCDATA }, /* used on DIV, IMG */ |
153 | | { TidyAttr_DATAPAGESIZE, "datapagesize", CH_NUMBER }, /* used on DIV, IMG */ |
154 | | { TidyAttr_DATASRC, "datasrc", CH_URL }, /* used on TABLE */ |
155 | | { TidyAttr_DATETIME, "datetime", CH_DATE }, /* INS, DEL */ |
156 | | { TidyAttr_DECLARE, "declare", CH_BOOL }, /* OBJECT */ |
157 | | { TidyAttr_DEFER, "defer", CH_BOOL }, /* SCRIPT */ |
158 | | { TidyAttr_DIR, "dir", CH_TEXTDIR }, /* ltr, rtl or auto */ |
159 | | { TidyAttr_DISABLED, "disabled", CH_BOOL }, /* form fields */ |
160 | | { TidyAttr_DOWNLOAD, "download", CH_PCDATA }, /* anchor */ |
161 | | { TidyAttr_ENCODING, "encoding", CH_PCDATA }, /* <?xml?> */ |
162 | | { TidyAttr_ENCTYPE, "enctype", CH_XTYPE }, /* FORM */ |
163 | | { TidyAttr_FACE, "face", CH_PCDATA }, /* BASEFONT, FONT */ |
164 | | { TidyAttr_FOR, "for", CH_IDREF }, /* LABEL */ |
165 | | { TidyAttr_FRAME, "frame", CH_TFRAME }, /* TABLE */ |
166 | | { TidyAttr_FRAMEBORDER, "frameborder", CH_FBORDER }, /* 0 or 1 */ |
167 | | { TidyAttr_FRAMESPACING, "framespacing", CH_NUMBER }, |
168 | | { TidyAttr_GRIDX, "gridx", CH_NUMBER }, /* TABLE Adobe golive*/ |
169 | | { TidyAttr_GRIDY, "gridy", CH_NUMBER }, /* TABLE Adobe golive */ |
170 | | { TidyAttr_HEADERS, "headers", CH_IDREFS }, /* table cells */ |
171 | | { TidyAttr_HEIGHT, "height", CH_LENGTH }, /* pixels only for TH/TD */ |
172 | | { TidyAttr_HREF, "href", CH_URL }, /* A, AREA, LINK and BASE */ |
173 | | { TidyAttr_HREFLANG, "hreflang", CH_LANG }, /* A, LINK */ |
174 | | { TidyAttr_HSPACE, "hspace", CH_NUMBER }, /* APPLET, IMG, OBJECT */ |
175 | | { TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */ |
176 | | { TidyAttr_ID, "id", CH_IDDEF }, |
177 | | { TidyAttr_IS, "is", CH_ISDEF }, |
178 | | { TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */ |
179 | | { TidyAttr_ITEMID, "itemid", CH_PCDATA }, |
180 | | { TidyAttr_ITEMPROP, "itemprop", CH_PCDATA }, |
181 | | { TidyAttr_ITEMREF, "itemref", CH_PCDATA }, |
182 | | { TidyAttr_ITEMSCOPE, "itemscope", CH_BOOL }, |
183 | | { TidyAttr_ITEMTYPE, "itemtype", CH_URL }, |
184 | | { TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */ |
185 | | { TidyAttr_LANG, "lang", CH_LANG }, |
186 | | { TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */ |
187 | | { TidyAttr_LAST_MODIFIED, "last_modified", CH_PCDATA }, /* A */ |
188 | | { TidyAttr_LAST_VISIT, "last_visit", CH_PCDATA }, /* A */ |
189 | | { TidyAttr_LEFTMARGIN, "leftmargin", CH_NUMBER }, /* used on BODY */ |
190 | | { TidyAttr_LINK, "link", CH_COLOR }, /* BODY */ |
191 | | { TidyAttr_LONGDESC, "longdesc", CH_URL }, /* IMG */ |
192 | | { TidyAttr_LOWSRC, "lowsrc", CH_URL }, /* IMG */ |
193 | | { TidyAttr_MARGINHEIGHT, "marginheight", CH_NUMBER }, /* FRAME, IFRAME, BODY */ |
194 | | { TidyAttr_MARGINWIDTH, "marginwidth", CH_NUMBER }, /* ditto */ |
195 | | { TidyAttr_MAXLENGTH, "maxlength", CH_NUMBER }, /* INPUT */ |
196 | | { TidyAttr_MEDIA, "media", CH_MEDIA }, /* STYLE, LINK */ |
197 | | { TidyAttr_METHOD, "method", CH_FSUBMIT }, /* FORM: get or post */ |
198 | | { TidyAttr_MULTIPLE, "multiple", CH_BOOL }, /* SELECT */ |
199 | | { TidyAttr_NAME, "name", CH_NAME }, |
200 | | { TidyAttr_NOHREF, "nohref", CH_BOOL }, /* AREA */ |
201 | | { TidyAttr_NORESIZE, "noresize", CH_BOOL }, /* FRAME */ |
202 | | { TidyAttr_NOSHADE, "noshade", CH_BOOL }, /* HR */ |
203 | | { TidyAttr_NOWRAP, "nowrap", CH_BOOL }, /* table cells */ |
204 | | { TidyAttr_OBJECT, "object", CH_PCDATA }, /* APPLET */ |
205 | | { TidyAttr_OnAFTERUPDATE, "onafterupdate", CH_SCRIPT }, |
206 | | { TidyAttr_OnBEFOREUNLOAD, "onbeforeunload", CH_SCRIPT }, |
207 | | { TidyAttr_OnBEFOREUPDATE, "onbeforeupdate", CH_SCRIPT }, |
208 | | { TidyAttr_OnBLUR, "onblur", CH_SCRIPT }, /* event */ |
209 | | { TidyAttr_OnCHANGE, "onchange", CH_SCRIPT }, /* event */ |
210 | | { TidyAttr_OnCLICK, "onclick", CH_SCRIPT }, /* event */ |
211 | | { TidyAttr_OnDATAAVAILABLE, "ondataavailable", CH_SCRIPT }, /* object, applet */ |
212 | | { TidyAttr_OnDATASETCHANGED, "ondatasetchanged", CH_SCRIPT }, /* object, applet */ |
213 | | { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", CH_SCRIPT }, |
214 | | { TidyAttr_OnDBLCLICK, "ondblclick", CH_SCRIPT }, /* event */ |
215 | | { TidyAttr_OnERRORUPDATE, "onerrorupdate", CH_SCRIPT }, /* form fields */ |
216 | | { TidyAttr_OnFOCUS, "onfocus", CH_SCRIPT }, /* event */ |
217 | | { TidyAttr_OnKEYDOWN, "onkeydown", CH_SCRIPT }, /* event */ |
218 | | { TidyAttr_OnKEYPRESS, "onkeypress", CH_SCRIPT }, /* event */ |
219 | | { TidyAttr_OnKEYUP, "onkeyup", CH_SCRIPT }, /* event */ |
220 | | { TidyAttr_OnLOAD, "onload", CH_SCRIPT }, /* event */ |
221 | | { TidyAttr_OnMOUSEDOWN, "onmousedown", CH_SCRIPT }, /* event */ |
222 | | { TidyAttr_OnMOUSEMOVE, "onmousemove", CH_SCRIPT }, /* event */ |
223 | | { TidyAttr_OnMOUSEOUT, "onmouseout", CH_SCRIPT }, /* event */ |
224 | | { TidyAttr_OnMOUSEOVER, "onmouseover", CH_SCRIPT }, /* event */ |
225 | | { TidyAttr_OnMOUSEUP, "onmouseup", CH_SCRIPT }, /* event */ |
226 | | { TidyAttr_OnRESET, "onreset", CH_SCRIPT }, /* event */ |
227 | | { TidyAttr_OnROWENTER, "onrowenter", CH_SCRIPT }, /* form fields */ |
228 | | { TidyAttr_OnROWEXIT, "onrowexit", CH_SCRIPT }, /* form fields */ |
229 | | { TidyAttr_OnSELECT, "onselect", CH_SCRIPT }, /* event */ |
230 | | { TidyAttr_OnSUBMIT, "onsubmit", CH_SCRIPT }, /* event */ |
231 | | { TidyAttr_OnUNLOAD, "onunload", CH_SCRIPT }, /* event */ |
232 | | { TidyAttr_PROFILE, "profile", CH_URL }, /* HEAD */ |
233 | | { TidyAttr_PROMPT, "prompt", CH_PCDATA }, /* ISINDEX */ |
234 | | { TidyAttr_RBSPAN, "rbspan", CH_NUMBER }, /* ruby markup */ |
235 | | { TidyAttr_READONLY, "readonly", CH_BOOL }, /* form fields */ |
236 | | { TidyAttr_REL, "rel", CH_LINKTYPES }, |
237 | | { TidyAttr_REV, "rev", CH_LINKTYPES }, |
238 | | { TidyAttr_RIGHTMARGIN, "rightmargin", CH_NUMBER }, /* used on BODY */ |
239 | | { TidyAttr_ROLE, "role", CH_PCDATA }, |
240 | | { TidyAttr_ROWS, "rows", CH_NUMBER }, /* TEXTAREA */ |
241 | | { TidyAttr_ROWSPAN, "rowspan", CH_NUMBER }, /* table cells */ |
242 | | { TidyAttr_RULES, "rules", CH_TRULES }, /* TABLE */ |
243 | | { TidyAttr_SCHEME, "scheme", CH_PCDATA }, /* META */ |
244 | | { TidyAttr_SCOPE, "scope", CH_SCOPE }, /* table cells */ |
245 | | { TidyAttr_SCROLLING, "scrolling", CH_SCROLL }, /* yes, no or auto */ |
246 | | { TidyAttr_SELECTED, "selected", CH_BOOL }, /* OPTION */ |
247 | | { TidyAttr_SHAPE, "shape", CH_SHAPE }, /* AREA, A */ |
248 | | { TidyAttr_SHOWGRID, "showgrid", CH_BOOL }, /* TABLE Adobe golive */ |
249 | | { TidyAttr_SHOWGRIDX, "showgridx", CH_BOOL }, /* TABLE Adobe golive*/ |
250 | | { TidyAttr_SHOWGRIDY, "showgridy", CH_BOOL }, /* TABLE Adobe golive*/ |
251 | | { TidyAttr_SIZE, "size", CH_NUMBER }, /* HR, FONT, BASEFONT, SELECT */ |
252 | | { TidyAttr_SPAN, "span", CH_NUMBER }, /* COL, COLGROUP */ |
253 | | { TidyAttr_SRC, "src", CH_URL }, /* IMG, FRAME, IFRAME */ |
254 | | { TidyAttr_SRCSET, "srcset", CH_PCDATA }, /* IMG (HTML5) */ |
255 | | { TidyAttr_STANDBY, "standby", CH_PCDATA }, /* OBJECT */ |
256 | | { TidyAttr_START, "start", CH_NUMBER }, /* OL */ |
257 | | { TidyAttr_STYLE, "style", CH_PCDATA }, |
258 | | { TidyAttr_SUMMARY, "summary", CH_PCDATA }, /* TABLE */ |
259 | | { TidyAttr_TABINDEX, "tabindex", CH_NUMBER }, /* fields, OBJECT and A */ |
260 | | { TidyAttr_TARGET, "target", CH_TARGET }, /* names a frame/window */ |
261 | | { TidyAttr_TEXT, "text", CH_COLOR }, /* BODY */ |
262 | | { TidyAttr_TITLE, "title", CH_PCDATA }, /* text tool tip */ |
263 | | { TidyAttr_TOPMARGIN, "topmargin", CH_NUMBER }, /* used on BODY */ |
264 | | { TidyAttr_TRANSLATE, "translate", CH_BOOL }, /* HTML5 global attribute */ |
265 | | { TidyAttr_TYPE, "type", CH_TYPE }, /* also used by SPACER */ |
266 | | { TidyAttr_USEMAP, "usemap", CH_URL }, /* things with images */ |
267 | | { TidyAttr_VALIGN, "valign", CH_VALIGN }, |
268 | | { TidyAttr_VALUE, "value", CH_PCDATA }, |
269 | | { TidyAttr_VALUETYPE, "valuetype", CH_VTYPE }, /* PARAM: data, ref, object */ |
270 | | { TidyAttr_VERSION, "version", CH_PCDATA }, /* HTML <?xml?> */ |
271 | | { TidyAttr_VLINK, "vlink", CH_COLOR }, /* BODY */ |
272 | | { TidyAttr_VSPACE, "vspace", CH_NUMBER }, /* IMG, OBJECT, APPLET */ |
273 | | { TidyAttr_WIDTH, "width", CH_LENGTH }, /* pixels only for TD/TH */ |
274 | | { TidyAttr_WRAP, "wrap", CH_PCDATA }, /* textarea */ |
275 | | { TidyAttr_XML_LANG, "xml:lang", CH_LANG }, /* XML language */ |
276 | | { TidyAttr_XML_SPACE, "xml:space", CH_PCDATA }, /* XML white space */ |
277 | | |
278 | | /* todo: VERS_ALL is wrong! */ |
279 | | { TidyAttr_XMLNS, "xmlns", CH_PCDATA }, /* name space */ |
280 | | { TidyAttr_EVENT, "event", CH_PCDATA }, /* reserved for <script> */ |
281 | | { TidyAttr_METHODS, "methods", CH_PCDATA }, /* for <a>, never implemented */ |
282 | | { TidyAttr_N, "n", CH_PCDATA }, /* for <nextid> */ |
283 | | { TidyAttr_SDAFORM, "sdaform", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ |
284 | | { TidyAttr_SDAPREF, "sdapref", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ |
285 | | { TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ |
286 | | { TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */ |
287 | | |
288 | | /* HTML5 */ |
289 | | { TidyAttr_ASYNC, "async", CH_BOOL }, /* <script src="..." async> */ |
290 | | { TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA }, |
291 | | { TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA }, |
292 | | { TidyAttr_AUTOPLAY, "autoplay", CH_PCDATA }, |
293 | | { TidyAttr_CHALLENGE, "challenge", CH_PCDATA }, |
294 | | { TidyAttr_CONTENTEDITABLE, "contenteditable", CH_PCDATA }, |
295 | | { TidyAttr_CONTEXTMENU, "contextmenu", CH_PCDATA }, |
296 | | { TidyAttr_CONTROLS, "controls", CH_PCDATA }, |
297 | | { TidyAttr_CROSSORIGIN, "crossorigin", CH_PCDATA }, |
298 | | { TidyAttr_DEFAULT, "default", CH_PCDATA }, |
299 | | { TidyAttr_DIRNAME, "dirname", CH_PCDATA }, |
300 | | { TidyAttr_DRAGGABLE, "draggable", CH_PCDATA }, |
301 | | { TidyAttr_DROPZONE, "dropzone", CH_PCDATA }, |
302 | | { TidyAttr_FORM, "form", CH_PCDATA }, |
303 | | { TidyAttr_FORMACTION, "formaction", CH_PCDATA }, |
304 | | { TidyAttr_FORMENCTYPE, "formenctype", CH_PCDATA }, |
305 | | { TidyAttr_FORMMETHOD, "formmethod", CH_PCDATA }, |
306 | | { TidyAttr_FORMNOVALIDATE, "formnovalidate", CH_PCDATA }, |
307 | | { TidyAttr_FORMTARGET, "formtarget", CH_PCDATA }, |
308 | | { TidyAttr_HIDDEN, "hidden", CH_PCDATA }, |
309 | | { TidyAttr_HIGH, "high", CH_PCDATA }, |
310 | | { TidyAttr_ICON, "icon", CH_PCDATA }, |
311 | | { TidyAttr_KEYTYPE, "keytype", CH_PCDATA }, |
312 | | { TidyAttr_KIND, "kind", CH_PCDATA }, |
313 | | { TidyAttr_LIST, "list", CH_PCDATA }, |
314 | | { TidyAttr_LOOP, "loop", CH_PCDATA }, |
315 | | { TidyAttr_LOW, "low", CH_PCDATA }, |
316 | | { TidyAttr_MANIFEST, "manifest", CH_PCDATA }, |
317 | | { TidyAttr_MAX, "max", CH_PCDATA }, |
318 | | { TidyAttr_MEDIAGROUP, "mediagroup", CH_PCDATA }, |
319 | | { TidyAttr_MIN, "min", CH_PCDATA }, |
320 | | { TidyAttr_MUTED, "muted", CH_BOOL }, |
321 | | { TidyAttr_NOVALIDATE, "novalidate", CH_PCDATA }, |
322 | | { TidyAttr_OPEN, "open", CH_BOOL }, /* Is. #925 PR #932 */ |
323 | | { TidyAttr_OPTIMUM, "optimum", CH_PCDATA }, |
324 | | { TidyAttr_OnABORT, "onabort", CH_PCDATA }, |
325 | | { TidyAttr_OnAFTERPRINT, "onafterprint", CH_PCDATA }, |
326 | | { TidyAttr_OnBEFOREPRINT, "onbeforeprint", CH_PCDATA }, |
327 | | { TidyAttr_OnCANPLAY, "oncanplay", CH_PCDATA }, |
328 | | { TidyAttr_OnCANPLAYTHROUGH, "oncanplaythrough", CH_PCDATA }, |
329 | | { TidyAttr_OnCONTEXTMENU, "oncontextmenu", CH_PCDATA }, |
330 | | { TidyAttr_OnCUECHANGE, "oncuechange", CH_PCDATA }, |
331 | | { TidyAttr_OnDRAG, "ondrag", CH_PCDATA }, |
332 | | { TidyAttr_OnDRAGEND, "ondragend", CH_PCDATA }, |
333 | | { TidyAttr_OnDRAGENTER, "ondragenter", CH_PCDATA }, |
334 | | { TidyAttr_OnDRAGLEAVE, "ondragleave", CH_PCDATA }, |
335 | | { TidyAttr_OnDRAGOVER, "ondragover", CH_PCDATA }, |
336 | | { TidyAttr_OnDRAGSTART, "ondragstart", CH_PCDATA }, |
337 | | { TidyAttr_OnDROP, "ondrop", CH_PCDATA }, |
338 | | { TidyAttr_OnDURATIONCHANGE, "ondurationchange", CH_PCDATA }, |
339 | | { TidyAttr_OnEMPTIED, "onemptied", CH_PCDATA }, |
340 | | { TidyAttr_OnENDED, "onended", CH_PCDATA }, |
341 | | { TidyAttr_OnERROR, "onerror", CH_PCDATA }, |
342 | | { TidyAttr_OnHASHCHANGE, "onhashchange", CH_PCDATA }, |
343 | | { TidyAttr_OnINPUT, "oninput", CH_PCDATA }, |
344 | | { TidyAttr_OnINVALID, "oninvalid", CH_PCDATA }, |
345 | | { TidyAttr_OnLOADEDDATA, "onloadeddata", CH_PCDATA }, |
346 | | { TidyAttr_OnLOADEDMETADATA, "onloadedmetadata", CH_PCDATA }, |
347 | | { TidyAttr_OnLOADSTART, "onloadstart", CH_PCDATA }, |
348 | | { TidyAttr_OnMESSAGE, "onmessage", CH_PCDATA }, |
349 | | { TidyAttr_OnMOUSEWHEEL, "onmousewheel", CH_PCDATA }, |
350 | | { TidyAttr_OnOFFLINE, "onoffline", CH_PCDATA }, |
351 | | { TidyAttr_OnONLINE, "ononline", CH_PCDATA }, |
352 | | { TidyAttr_OnPAGEHIDE, "onpagehide", CH_PCDATA }, |
353 | | { TidyAttr_OnPAGESHOW, "onpageshow", CH_PCDATA }, |
354 | | { TidyAttr_OnPAUSE, "onpause", CH_PCDATA }, |
355 | | { TidyAttr_OnPLAY, "onplay", CH_PCDATA }, |
356 | | { TidyAttr_OnPLAYING, "onplaying", CH_PCDATA }, |
357 | | { TidyAttr_OnPOPSTATE, "onpopstate", CH_PCDATA }, |
358 | | { TidyAttr_OnPROGRESS, "onprogress", CH_PCDATA }, |
359 | | { TidyAttr_OnRATECHANGE, "onratechange", CH_PCDATA }, |
360 | | { TidyAttr_OnREADYSTATECHANGE, "onreadystatechange", CH_PCDATA }, |
361 | | { TidyAttr_OnREDO, "onredo", CH_PCDATA }, |
362 | | { TidyAttr_OnRESIZE, "onresize", CH_PCDATA }, |
363 | | { TidyAttr_OnSCROLL, "onscroll", CH_PCDATA }, |
364 | | { TidyAttr_OnSEEKED, "onseeked", CH_PCDATA }, |
365 | | { TidyAttr_OnSEEKING, "onseeking", CH_PCDATA }, |
366 | | { TidyAttr_OnSHOW, "onshow", CH_PCDATA }, |
367 | | { TidyAttr_OnSTALLED, "onstalled", CH_PCDATA }, |
368 | | { TidyAttr_OnSTORAGE, "onstorage", CH_PCDATA }, |
369 | | { TidyAttr_OnSUSPEND, "onsuspend", CH_PCDATA }, |
370 | | { TidyAttr_OnTIMEUPDATE, "ontimeupdate", CH_PCDATA }, |
371 | | { TidyAttr_OnUNDO, "onundo", CH_PCDATA }, |
372 | | { TidyAttr_OnVOLUMECHANGE, "onvolumechange", CH_PCDATA }, |
373 | | { TidyAttr_OnWAITING, "onwaiting", CH_PCDATA }, |
374 | | { TidyAttr_PATTERN, "pattern", CH_PCDATA }, |
375 | | { TidyAttr_PLACEHOLDER, "placeholder", CH_PCDATA }, |
376 | | { TidyAttr_PLAYSINLINE, "playsinline", CH_BOOL }, |
377 | | { TidyAttr_POSTER, "poster", CH_PCDATA }, |
378 | | { TidyAttr_PRELOAD, "preload", CH_PCDATA }, |
379 | | { TidyAttr_PUBDATE, "pubdate", CH_PCDATA }, |
380 | | { TidyAttr_RADIOGROUP, "radiogroup", CH_PCDATA }, |
381 | | { TidyAttr_REQUIRED, "required", CH_PCDATA }, |
382 | | { TidyAttr_REVERSED, "reversed", CH_PCDATA }, |
383 | | { TidyAttr_SANDBOX, "sandbox", CH_PCDATA }, |
384 | | { TidyAttr_SCOPED, "scoped", CH_PCDATA }, |
385 | | { TidyAttr_SEAMLESS, "seamless", CH_PCDATA }, |
386 | | { TidyAttr_SIZES, "sizes", CH_PCDATA }, |
387 | | { TidyAttr_SPELLCHECK, "spellcheck", CH_PCDATA }, |
388 | | { TidyAttr_SRCDOC, "srcdoc", CH_PCDATA }, |
389 | | { TidyAttr_SRCLANG, "srclang", CH_PCDATA }, |
390 | | { TidyAttr_STEP, "step", CH_PCDATA }, |
391 | | |
392 | | /* HTML5 Aria Attributes */ |
393 | | { TidyAttr_ARIA_ACTIVEDESCENDANT, "aria-activedescendant", CH_PCDATA }, |
394 | | { TidyAttr_ARIA_ATOMIC, "aria-atomic", CH_PCDATA }, |
395 | | { TidyAttr_ARIA_AUTOCOMPLETE, "aria-autocomplete", CH_PCDATA }, |
396 | | { TidyAttr_ARIA_BUSY, "aria-busy", CH_PCDATA }, |
397 | | { TidyAttr_ARIA_CHECKED, "aria-checked", CH_PCDATA }, |
398 | | { TidyAttr_ARIA_CONTROLS, "aria-controls", CH_PCDATA }, |
399 | | { TidyAttr_ARIA_DESCRIBEDBY, "aria-describedby", CH_PCDATA }, |
400 | | { TidyAttr_ARIA_DISABLED, "aria-disabled", CH_PCDATA }, |
401 | | { TidyAttr_ARIA_DROPEFFECT, "aria-dropeffect", CH_PCDATA }, |
402 | | { TidyAttr_ARIA_EXPANDED, "aria-expanded", CH_PCDATA }, |
403 | | { TidyAttr_ARIA_FLOWTO, "aria-flowto", CH_PCDATA }, |
404 | | { TidyAttr_ARIA_GRABBED, "aria-grabbed", CH_PCDATA }, |
405 | | { TidyAttr_ARIA_HASPOPUP, "aria-haspopup", CH_PCDATA }, |
406 | | { TidyAttr_ARIA_HIDDEN, "aria-hidden", CH_PCDATA }, |
407 | | { TidyAttr_ARIA_INVALID, "aria-invalid", CH_PCDATA }, |
408 | | { TidyAttr_ARIA_LABEL, "aria-label", CH_PCDATA }, |
409 | | { TidyAttr_ARIA_LABELLEDBY, "aria-labelledby", CH_PCDATA }, |
410 | | { TidyAttr_ARIA_LEVEL, "aria-level", CH_PCDATA }, |
411 | | { TidyAttr_ARIA_LIVE, "aria-live", CH_PCDATA }, |
412 | | { TidyAttr_ARIA_MULTILINE, "aria-multiline", CH_PCDATA }, |
413 | | { TidyAttr_ARIA_MULTISELECTABLE, "aria-multiselectable", CH_PCDATA }, |
414 | | { TidyAttr_ARIA_ORIENTATION, "aria-orientation", CH_PCDATA }, |
415 | | { TidyAttr_ARIA_OWNS, "aria-owns", CH_PCDATA }, |
416 | | { TidyAttr_ARIA_POSINSET, "aria-posinset", CH_PCDATA }, |
417 | | { TidyAttr_ARIA_PRESSED, "aria-pressed", CH_PCDATA }, |
418 | | { TidyAttr_ARIA_READONLY, "aria-readonly", CH_PCDATA }, |
419 | | { TidyAttr_ARIA_RELEVANT, "aria-relevant", CH_PCDATA }, |
420 | | { TidyAttr_ARIA_REQUIRED, "aria-required", CH_PCDATA }, |
421 | | { TidyAttr_ARIA_SELECTED, "aria-selected", CH_PCDATA }, |
422 | | { TidyAttr_ARIA_SETSIZE, "aria-setsize", CH_PCDATA }, |
423 | | { TidyAttr_ARIA_SORT, "aria-sort", CH_PCDATA }, |
424 | | { TidyAttr_ARIA_VALUEMAX, "aria-valuemax", CH_PCDATA }, |
425 | | { TidyAttr_ARIA_VALUEMIN, "aria-valuemin", CH_PCDATA }, |
426 | | { TidyAttr_ARIA_VALUENOW, "aria-valuenow", CH_PCDATA }, |
427 | | { TidyAttr_ARIA_VALUETEXT, "aria-valuetext", CH_PCDATA }, |
428 | | { TidyAttr_X, "x", CH_PCDATA }, /* for <svg> */ |
429 | | { TidyAttr_Y, "y", CH_PCDATA }, /* for <svg> */ |
430 | | { TidyAttr_VIEWBOX, "viewbox", CH_PCDATA }, /* for <svg> */ |
431 | | { TidyAttr_PRESERVEASPECTRATIO, "preserveaspectratio", CH_PCDATA }, /* for <svg> */ |
432 | | { TidyAttr_ZOOMANDPAN, "zoomandpan", CH_PCDATA }, /* for <svg> */ |
433 | | { TidyAttr_BASEPROFILE, "baseprofile", CH_PCDATA }, /* for <svg> */ |
434 | | { TidyAttr_CONTENTSCRIPTTYPE, "contentscripttype", CH_PCDATA }, /* for <svg> */ |
435 | | { TidyAttr_CONTENTSTYLETYPE, "contentstyletype", CH_PCDATA }, /* for <svg> */ |
436 | | { TidyAttr_DISPLAY, "display", CH_PCDATA }, /* on MATH tag (html5) */ |
437 | | |
438 | | /* RDFa Attributes */ |
439 | | { TidyAttr_ABOUT, "about", CH_RDFASCURIE }, |
440 | | { TidyAttr_DATATYPE, "datatype", CH_RDFATERM }, |
441 | | { TidyAttr_INLIST, "inlist", CH_BOOL }, |
442 | | { TidyAttr_PREFIX, "prefix", CH_RDFAPREFIX }, |
443 | | { TidyAttr_PROPERTY, "property", CH_RDFATERMS }, |
444 | | { TidyAttr_RESOURCE, "resource", CH_RDFASCURIE }, |
445 | | { TidyAttr_TYPEOF, "typeof", CH_RDFATERMS }, |
446 | | { TidyAttr_VOCAB, "vocab", CH_URL }, |
447 | | |
448 | | { TidyAttr_INTEGRITY, "integrity", CH_PCDATA }, |
449 | | |
450 | | /* Preload spec: https://www.w3.org/TR/preload/ */ |
451 | | { TidyAttr_AS, "as", CH_PCDATA }, |
452 | | |
453 | | /* for xmlns:xlink in <svg> */ |
454 | | { TidyAttr_XMLNSXLINK, "xmlns:xlink", CH_URL }, |
455 | | { TidyAttr_SLOT, "slot", CH_PCDATA }, |
456 | | { TidyAttr_LOADING, "loading", CH_LOADING }, /* IMG, IFRAME */ |
457 | | |
458 | | /* SVG paint attributes (SVG 1.1) */ |
459 | | { TidyAttr_FILL, "fill", CH_SVG }, |
460 | | { TidyAttr_FILLRULE, "fill-rule", CH_SVG }, |
461 | | { TidyAttr_STROKE, "stroke", CH_SVG }, |
462 | | { TidyAttr_STROKEDASHARRAY, "stroke-dasharray", CH_SVG }, |
463 | | { TidyAttr_STROKEDASHOFFSET, "stroke-dashoffset", CH_SVG }, |
464 | | { TidyAttr_STROKELINECAP, "stroke-linecap", CH_SVG }, |
465 | | { TidyAttr_STROKELINEJOIN, "stroke-linejoin", CH_SVG }, |
466 | | { TidyAttr_STROKEMITERLIMIT, "stroke-miterlimit", CH_SVG }, |
467 | | { TidyAttr_STROKEWIDTH, "stroke-width", CH_SVG }, |
468 | | { TidyAttr_COLORINTERPOLATION, "color-interpolation", CH_SVG }, |
469 | | { TidyAttr_COLORRENDERING, "color-rendering", CH_SVG }, |
470 | | { TidyAttr_OPACITY, "opacity", CH_SVG }, |
471 | | { TidyAttr_STROKEOPACITY, "stroke-opacity", CH_SVG }, |
472 | | { TidyAttr_FILLOPACITY, "fill-opacity", CH_SVG }, |
473 | | |
474 | | /* this must be the final entry */ |
475 | | { N_TIDY_ATTRIBS, NULL, NULL } |
476 | | }; |
477 | | |
478 | | static uint AttributeVersions(Node* node, AttVal* attval) |
479 | 21.2k | { |
480 | 21.2k | uint i; |
481 | | |
482 | | /* Override or add to items in attrdict.c */ |
483 | 21.2k | if (attval && attval->attribute) { |
484 | | /* HTML5 data-* attributes can't be added generically; handle here. */ |
485 | 21.0k | if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0) |
486 | 0 | return (XH50 | HT50); |
487 | 21.0k | } |
488 | | /* TODO: maybe this should return VERS_PROPRIETARY instead? */ |
489 | 21.2k | if (!attval || !attval->dict) |
490 | 10.2k | return VERS_UNKNOWN; |
491 | | |
492 | 10.9k | if (!(!node || !node->tag || !node->tag->attrvers)) |
493 | 1.01M | for (i = 0; node->tag->attrvers[i].attribute; ++i) |
494 | 1.00M | if (node->tag->attrvers[i].attribute == attval->dict->id) |
495 | 5.81k | return node->tag->attrvers[i].versions; |
496 | | |
497 | 5.15k | return VERS_PROPRIETARY; |
498 | 10.9k | } |
499 | | |
500 | | |
501 | | /* return the version of the attribute "id" of element "node" */ |
502 | | uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id ) |
503 | 44 | { |
504 | 44 | uint i; |
505 | | |
506 | 44 | if (!node || !node->tag || !node->tag->attrvers) |
507 | 0 | return VERS_UNKNOWN; |
508 | | |
509 | 2.04k | for (i = 0; node->tag->attrvers[i].attribute; ++i) |
510 | 2.04k | if (node->tag->attrvers[i].attribute == id) |
511 | 44 | return node->tag->attrvers[i].versions; |
512 | | |
513 | 0 | return VERS_UNKNOWN; |
514 | 44 | } |
515 | | |
516 | | /* returns true if the element is a W3C defined element |
517 | | * but the element/attribute combination is not. We're |
518 | | * only defining as "proprietary" items that are not in |
519 | | * the element's AttrVersion structure. |
520 | | */ |
521 | | Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval) |
522 | 26.5k | { |
523 | 26.5k | if (!node || !attval) |
524 | 0 | return no; |
525 | | |
526 | 26.5k | if (!node->tag) |
527 | 11.5k | return no; |
528 | | |
529 | 14.9k | if (!(node->tag->versions & VERS_ALL)) |
530 | 0 | return no; |
531 | | |
532 | 14.9k | if (AttributeVersions(node, attval) & VERS_ALL) |
533 | 2.79k | return no; |
534 | | |
535 | 12.1k | return yes; |
536 | 14.9k | } |
537 | | |
538 | | /* returns true if the element is a W3C defined element |
539 | | * but the element/attribute combination is not. We're |
540 | | * considering it a mismatch if the document version |
541 | | * does not allow the attribute as called out in its |
542 | | * AttrVersion structure. |
543 | | */ |
544 | | Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc) |
545 | 0 | { |
546 | 0 | uint doctype; |
547 | | |
548 | 0 | if (!node || !attval) |
549 | 0 | return no; |
550 | | |
551 | 0 | if (!node->tag) |
552 | 0 | return no; |
553 | | |
554 | 0 | if (!(node->tag->versions & VERS_ALL)) |
555 | 0 | return no; |
556 | | |
557 | 0 | doctype = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted; |
558 | |
|
559 | 0 | if (AttributeVersions(node, attval) & doctype) |
560 | 0 | return no; |
561 | | |
562 | 0 | return yes; |
563 | 0 | } |
564 | | |
565 | | |
566 | | /* used by CheckColor() */ |
567 | | struct _colors |
568 | | { |
569 | | ctmbstr name; |
570 | | ctmbstr hex; |
571 | | }; |
572 | | |
573 | | static const struct _colors colors[] = |
574 | | { |
575 | | { "black", "#000000" }, |
576 | | { "green", "#008000" }, |
577 | | { "silver", "#C0C0C0" }, |
578 | | { "lime", "#00FF00" }, |
579 | | { "gray", "#808080" }, |
580 | | { "olive", "#808000" }, |
581 | | { "white", "#FFFFFF" }, |
582 | | { "yellow", "#FFFF00" }, |
583 | | { "maroon", "#800000" }, |
584 | | { "navy", "#000080" }, |
585 | | { "red", "#FF0000" }, |
586 | | { "blue", "#0000FF" }, |
587 | | { "purple", "#800080" }, |
588 | | { "teal", "#008080" }, |
589 | | { "fuchsia", "#FF00FF" }, |
590 | | { "aqua", "#00FFFF" }, |
591 | | { NULL, NULL } |
592 | | }; |
593 | | |
594 | | static const struct _colors extended_colors[] = |
595 | | { |
596 | | { "aliceblue", "#f0f8ff" }, |
597 | | { "antiquewhite", "#faebd7" }, |
598 | | { "aquamarine", "#7fffd4" }, |
599 | | { "azure", "#f0ffff" }, |
600 | | { "beige", "#f5f5dc" }, |
601 | | { "bisque", "#ffe4c4" }, |
602 | | { "blanchedalmond", "#ffebcd" }, |
603 | | { "blueviolet", "#8a2be2" }, |
604 | | { "brown", "#a52a2a" }, |
605 | | { "burlywood", "#deb887" }, |
606 | | { "cadetblue", "#5f9ea0" }, |
607 | | { "chartreuse", "#7fff00" }, |
608 | | { "chocolate", "#d2691e" }, |
609 | | { "coral", "#ff7f50" }, |
610 | | { "cornflowerblue", "#6495ed" }, |
611 | | { "cornsilk", "#fff8dc" }, |
612 | | { "crimson", "#dc143c" }, |
613 | | { "cyan", "#00ffff" }, |
614 | | { "darkblue", "#00008b" }, |
615 | | { "darkcyan", "#008b8b" }, |
616 | | { "darkgoldenrod", "#b8860b" }, |
617 | | { "darkgray", "#a9a9a9" }, |
618 | | { "darkgreen", "#006400" }, |
619 | | { "darkgrey", "#a9a9a9" }, |
620 | | { "darkkhaki", "#bdb76b" }, |
621 | | { "darkmagenta", "#8b008b" }, |
622 | | { "darkolivegreen", "#556b2f" }, |
623 | | { "darkorange", "#ff8c00" }, |
624 | | { "darkorchid", "#9932cc" }, |
625 | | { "darkred", "#8b0000" }, |
626 | | { "darksalmon", "#e9967a" }, |
627 | | { "darkseagreen", "#8fbc8f" }, |
628 | | { "darkslateblue", "#483d8b" }, |
629 | | { "darkslategray", "#2f4f4f" }, |
630 | | { "darkslategrey", "#2f4f4f" }, |
631 | | { "darkturquoise", "#00ced1" }, |
632 | | { "darkviolet", "#9400d3" }, |
633 | | { "deeppink", "#ff1493" }, |
634 | | { "deepskyblue", "#00bfff" }, |
635 | | { "dimgray", "#696969" }, |
636 | | { "dimgrey", "#696969" }, |
637 | | { "dodgerblue", "#1e90ff" }, |
638 | | { "firebrick", "#b22222" }, |
639 | | { "floralwhite", "#fffaf0" }, |
640 | | { "forestgreen", "#228b22" }, |
641 | | { "gainsboro", "#dcdcdc" }, |
642 | | { "ghostwhite", "#f8f8ff" }, |
643 | | { "gold", "#ffd700" }, |
644 | | { "goldenrod", "#daa520" }, |
645 | | { "greenyellow", "#adff2f" }, |
646 | | { "grey", "#808080" }, |
647 | | { "honeydew", "#f0fff0" }, |
648 | | { "hotpink", "#ff69b4" }, |
649 | | { "indianred", "#cd5c5c" }, |
650 | | { "indigo", "#4b0082" }, |
651 | | { "ivory", "#fffff0" }, |
652 | | { "khaki", "#f0e68c" }, |
653 | | { "lavender", "#e6e6fa" }, |
654 | | { "lavenderblush", "#fff0f5" }, |
655 | | { "lawngreen", "#7cfc00" }, |
656 | | { "lemonchiffon", "#fffacd" }, |
657 | | { "lightblue", "#add8e6" }, |
658 | | { "lightcoral", "#f08080" }, |
659 | | { "lightcyan", "#e0ffff" }, |
660 | | { "lightgoldenrodyellow", "#fafad2" }, |
661 | | { "lightgray", "#d3d3d3" }, |
662 | | { "lightgreen", "#90ee90" }, |
663 | | { "lightgrey", "#d3d3d3" }, |
664 | | { "lightpink", "#ffb6c1" }, |
665 | | { "lightsalmon", "#ffa07a" }, |
666 | | { "lightseagreen", "#20b2aa" }, |
667 | | { "lightskyblue", "#87cefa" }, |
668 | | { "lightslategray", "#778899" }, |
669 | | { "lightslategrey", "#778899" }, |
670 | | { "lightsteelblue", "#b0c4de" }, |
671 | | { "lightyellow", "#ffffe0" }, |
672 | | { "limegreen", "#32cd32" }, |
673 | | { "linen", "#faf0e6" }, |
674 | | { "magenta", "#ff00ff" }, |
675 | | { "mediumaquamarine", "#66cdaa" }, |
676 | | { "mediumblue", "#0000cd" }, |
677 | | { "mediumorchid", "#ba55d3" }, |
678 | | { "mediumpurple", "#9370db" }, |
679 | | { "mediumseagreen", "#3cb371" }, |
680 | | { "mediumslateblue", "#7b68ee" }, |
681 | | { "mediumspringgreen", "#00fa9a" }, |
682 | | { "mediumturquoise", "#48d1cc" }, |
683 | | { "mediumvioletred", "#c71585" }, |
684 | | { "midnightblue", "#191970" }, |
685 | | { "mintcream", "#f5fffa" }, |
686 | | { "mistyrose", "#ffe4e1" }, |
687 | | { "moccasin", "#ffe4b5" }, |
688 | | { "navajowhite", "#ffdead" }, |
689 | | { "oldlace", "#fdf5e6" }, |
690 | | { "olivedrab", "#6b8e23" }, |
691 | | { "orange", "#ffa500" }, |
692 | | { "orangered", "#ff4500" }, |
693 | | { "orchid", "#da70d6" }, |
694 | | { "palegoldenrod", "#eee8aa" }, |
695 | | { "palegreen", "#98fb98" }, |
696 | | { "paleturquoise", "#afeeee" }, |
697 | | { "palevioletred", "#db7093" }, |
698 | | { "papayawhip", "#ffefd5" }, |
699 | | { "peachpuff", "#ffdab9" }, |
700 | | { "peru", "#cd853f" }, |
701 | | { "pink", "#ffc0cb" }, |
702 | | { "plum", "#dda0dd" }, |
703 | | { "powderblue", "#b0e0e6" }, |
704 | | { "rebeccapurple", "#663399" }, |
705 | | { "rosybrown", "#bc8f8f" }, |
706 | | { "royalblue", "#4169e1" }, |
707 | | { "saddlebrown", "#8b4513" }, |
708 | | { "salmon", "#fa8072" }, |
709 | | { "sandybrown", "#f4a460" }, |
710 | | { "seagreen", "#2e8b57" }, |
711 | | { "seashell", "#fff5ee" }, |
712 | | { "sienna", "#a0522d" }, |
713 | | { "skyblue", "#87ceeb" }, |
714 | | { "slateblue", "#6a5acd" }, |
715 | | { "slategray", "#708090" }, |
716 | | { "slategrey", "#708090" }, |
717 | | { "snow", "#fffafa" }, |
718 | | { "springgreen", "#00ff7f" }, |
719 | | { "steelblue", "#4682b4" }, |
720 | | { "tan", "#d2b48c" }, |
721 | | { "thistle", "#d8bfd8" }, |
722 | | { "tomato", "#ff6347" }, |
723 | | { "turquoise", "#40e0d0" }, |
724 | | { "violet", "#ee82ee" }, |
725 | | { "wheat", "#f5deb3" }, |
726 | | { "whitesmoke", "#f5f5f5" }, |
727 | | { "yellowgreen", "#9acd32" }, |
728 | | { NULL, NULL } |
729 | | }; |
730 | | |
731 | | static ctmbstr GetColorCode(ctmbstr name, Bool use_css_colors) |
732 | 9 | { |
733 | 9 | uint i; |
734 | | |
735 | 153 | for (i = 0; colors[i].name; ++i) |
736 | 144 | if (TY_(tmbstrcasecmp)(name, colors[i].name) == 0) |
737 | 0 | return colors[i].hex; |
738 | | |
739 | 9 | if (use_css_colors) |
740 | 1.06k | for (i = 0; extended_colors[i].name; ++i) |
741 | 1.05k | if (TY_(tmbstrcasecmp)(name, extended_colors[i].name) == 0) |
742 | 0 | return extended_colors[i].hex; |
743 | | |
744 | 9 | return NULL; |
745 | 9 | } |
746 | | |
747 | | static ctmbstr GetColorName(ctmbstr code, Bool use_css_colors) |
748 | 0 | { |
749 | 0 | uint i; |
750 | |
|
751 | 0 | for (i = 0; colors[i].name; ++i) |
752 | 0 | if (TY_(tmbstrcasecmp)(code, colors[i].hex) == 0) |
753 | 0 | return colors[i].name; |
754 | | |
755 | 0 | if (use_css_colors) |
756 | 0 | for (i = 0; extended_colors[i].name; ++i) |
757 | 0 | if (TY_(tmbstrcasecmp)(code, extended_colors[i].hex) == 0) |
758 | 0 | return extended_colors[i].name; |
759 | | |
760 | 0 | return NULL; |
761 | 0 | } |
762 | | |
763 | | static uint attrsHash(ctmbstr s) |
764 | 88.2k | { |
765 | 88.2k | uint hashval; |
766 | | |
767 | 543k | for (hashval = 0; *s != '\0'; s++) |
768 | 455k | hashval = *s + 31*hashval; |
769 | | |
770 | 88.2k | return hashval % ATTRIBUTE_HASH_SIZE; |
771 | 88.2k | } |
772 | | |
773 | | static const Attribute *attrsInstall(TidyDocImpl* doc, TidyAttribImpl * attribs, |
774 | | const Attribute* old) |
775 | 307 | { |
776 | 307 | AttrHash *np; |
777 | 307 | uint hashval; |
778 | | |
779 | 307 | if (old) |
780 | 307 | { |
781 | 307 | np = (AttrHash *)TidyDocAlloc(doc, sizeof(*np)); |
782 | 307 | np->attr = old; |
783 | | |
784 | 307 | hashval = attrsHash(old->name); |
785 | 307 | np->next = attribs->hashtab[hashval]; |
786 | 307 | attribs->hashtab[hashval] = np; |
787 | 307 | } |
788 | | |
789 | 307 | return old; |
790 | 307 | } |
791 | | |
792 | | static void attrsRemoveFromHash( TidyDocImpl* doc, TidyAttribImpl *attribs, |
793 | | ctmbstr s ) |
794 | 0 | { |
795 | 0 | uint h = attrsHash(s); |
796 | 0 | AttrHash *p, *prev = NULL; |
797 | 0 | for (p = attribs->hashtab[h]; p && p->attr; p = p->next) |
798 | 0 | { |
799 | 0 | if (TY_(tmbstrcmp)(s, p->attr->name) == 0) |
800 | 0 | { |
801 | 0 | AttrHash* next = p->next; |
802 | 0 | if ( prev ) |
803 | 0 | prev->next = next; |
804 | 0 | else |
805 | 0 | attribs->hashtab[h] = next; |
806 | 0 | TidyDocFree(doc, p); |
807 | 0 | return; |
808 | 0 | } |
809 | 0 | prev = p; |
810 | 0 | } |
811 | 0 | } |
812 | | |
813 | | static void attrsEmptyHash( TidyDocImpl* doc, TidyAttribImpl * attribs ) |
814 | 52 | { |
815 | 52 | AttrHash *dict, *next; |
816 | 52 | uint i; |
817 | | |
818 | 9.30k | for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i) |
819 | 9.25k | { |
820 | 9.25k | dict = attribs->hashtab[i]; |
821 | | |
822 | 9.56k | while(dict) |
823 | 307 | { |
824 | 307 | next = dict->next; |
825 | 307 | TidyDocFree(doc, dict); |
826 | 307 | dict = next; |
827 | 307 | } |
828 | | |
829 | 9.25k | attribs->hashtab[i] = NULL; |
830 | 9.25k | } |
831 | 52 | } |
832 | | |
833 | | static const Attribute* attrsLookup(TidyDocImpl* doc, |
834 | | TidyAttribImpl* ARG_UNUSED(attribs), |
835 | | ctmbstr atnam) |
836 | 88.1k | { |
837 | 88.1k | const Attribute *np; |
838 | 88.1k | const AttrHash *p; |
839 | | |
840 | 88.1k | if (!atnam) |
841 | 146 | return NULL; |
842 | | |
843 | 88.3k | for (p = attribs->hashtab[attrsHash(atnam)]; p && p->attr; p = p->next) |
844 | 65.4k | if (TY_(tmbstrcasecmp)(atnam, p->attr->name) == 0) |
845 | 65.0k | return p->attr; |
846 | | |
847 | 7.86M | for (np = attribute_defs; np && np->name; ++np) |
848 | 7.84M | if (TY_(tmbstrcasecmp)(atnam, np->name) == 0) |
849 | 307 | return attrsInstall(doc, attribs, np); |
850 | | |
851 | 22.5k | return NULL; |
852 | 22.8k | } |
853 | | |
854 | | |
855 | | /* Locate attributes by type */ |
856 | | AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id ) |
857 | 437k | { |
858 | 437k | AttVal* av; |
859 | 524k | for ( av = node->attributes; av; av = av->next ) |
860 | 87.4k | { |
861 | 87.4k | if ( AttrIsId(av, id) ) |
862 | 268 | return av; |
863 | 87.4k | } |
864 | 436k | return NULL; |
865 | 437k | } |
866 | | |
867 | | /* public method for finding attribute definition by name */ |
868 | | const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval ) |
869 | 35.6k | { |
870 | 35.6k | if ( attval ) |
871 | 35.6k | return attrsLookup( doc, &doc->attribs, attval->attribute ); |
872 | 0 | return NULL; |
873 | 35.6k | } |
874 | | |
875 | | AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ) |
876 | 15.4k | { |
877 | 15.4k | AttVal *attr; |
878 | 15.6k | for (attr = node->attributes; attr != NULL; attr = attr->next) |
879 | 348 | { |
880 | 348 | if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0) |
881 | 176 | break; |
882 | 348 | } |
883 | 15.4k | return attr; |
884 | 15.4k | } |
885 | | |
886 | | void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name ) |
887 | 0 | { |
888 | 0 | AttVal *attr, *prev = NULL, *next; |
889 | |
|
890 | 0 | for (attr = node->attributes; attr != NULL; prev = attr, attr = next) |
891 | 0 | { |
892 | 0 | next = attr->next; |
893 | |
|
894 | 0 | if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0) |
895 | 0 | { |
896 | 0 | if (prev) |
897 | 0 | prev->next = next; |
898 | 0 | else |
899 | 0 | node->attributes = next; |
900 | |
|
901 | 0 | TY_(FreeAttribute)( doc, attr ); |
902 | 0 | break; |
903 | 0 | } |
904 | 0 | } |
905 | 0 | } |
906 | | |
907 | | AttVal* TY_(AddAttribute)( TidyDocImpl* doc, |
908 | | Node *node, ctmbstr name, ctmbstr value ) |
909 | 452 | { |
910 | 452 | AttVal *av = TY_(NewAttribute)(doc); |
911 | 452 | av->delim = '"'; |
912 | 452 | av->attribute = TY_(tmbstrdup)(doc->allocator, name); |
913 | | |
914 | 452 | if (value) |
915 | 342 | av->value = TY_(tmbstrdup)(doc->allocator, value); |
916 | 110 | else |
917 | 110 | av->value = NULL; |
918 | | |
919 | 452 | av->dict = attrsLookup(doc, &doc->attribs, name); |
920 | | |
921 | 452 | TY_(InsertAttributeAtEnd)(node, av); |
922 | 452 | return av; |
923 | 452 | } |
924 | | |
925 | | AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value) |
926 | 94 | { |
927 | 94 | AttVal* old = TY_(GetAttrByName)(node, name); |
928 | | |
929 | 94 | if (old) |
930 | 0 | { |
931 | 0 | if (old->value) |
932 | 0 | TidyDocFree(doc, old->value); |
933 | 0 | if (value) |
934 | 0 | old->value = TY_(tmbstrdup)(doc->allocator, value); |
935 | 0 | else |
936 | 0 | old->value = NULL; |
937 | |
|
938 | 0 | return old; |
939 | 0 | } |
940 | 94 | else |
941 | 94 | return TY_(AddAttribute)(doc, node, name, value); |
942 | 94 | } |
943 | | |
944 | | |
945 | | void TY_(FreeAttrPriorityList)( TidyDocImpl* doc ) |
946 | 52 | { |
947 | 52 | PriorityAttribs *priorities = &(doc->attribs.priorityAttribs); |
948 | | |
949 | 52 | if ( priorities->list ) |
950 | 0 | { |
951 | 0 | uint i = 0; |
952 | 0 | while ( priorities->list[i] != NULL ) |
953 | 0 | { |
954 | 0 | TidyFree( doc->allocator, priorities->list[i] ); |
955 | 0 | i++; |
956 | 0 | } |
957 | |
|
958 | 0 | TidyFree( doc->allocator, priorities->list ); |
959 | 0 | } |
960 | 52 | } |
961 | | |
962 | | |
963 | | void TY_(DefinePriorityAttribute)(TidyDocImpl* doc, ctmbstr name) |
964 | 0 | { |
965 | 0 | enum { capacity = 10 }; |
966 | 0 | PriorityAttribs *priorities = &(doc->attribs.priorityAttribs); |
967 | |
|
968 | 0 | if ( !priorities->list ) |
969 | 0 | { |
970 | 0 | priorities->list = TidyAlloc(doc->allocator, sizeof(ctmbstr) * capacity ); |
971 | 0 | priorities->list[0] = NULL; |
972 | 0 | priorities->capacity = capacity; |
973 | 0 | priorities->count = 0; |
974 | 0 | } |
975 | |
|
976 | 0 | if ( priorities->count >= priorities->capacity ) |
977 | 0 | { |
978 | 0 | priorities->capacity = priorities->capacity * 2; |
979 | 0 | priorities->list = TidyRealloc(doc->allocator, priorities->list, sizeof(tmbstr) * priorities->capacity + 1 ); |
980 | 0 | } |
981 | |
|
982 | 0 | priorities->list[priorities->count] = TY_(tmbstrdup)( doc->allocator, name); |
983 | 0 | priorities->count++; |
984 | 0 | priorities->list[priorities->count] = NULL; |
985 | 0 | } |
986 | | |
987 | | |
988 | | TidyIterator TY_(getPriorityAttrList)( TidyDocImpl* doc ) |
989 | 0 | { |
990 | 0 | PriorityAttribs *priorities = &(doc->attribs.priorityAttribs); |
991 | 0 | size_t result = priorities->count > 0 ? 1 : 0; |
992 | |
|
993 | 0 | return (TidyIterator) result; |
994 | 0 | } |
995 | | |
996 | | |
997 | | ctmbstr TY_(getNextPriorityAttr)( TidyDocImpl* doc, TidyIterator* iter ) |
998 | 0 | { |
999 | 0 | PriorityAttribs *priorities = &(doc->attribs.priorityAttribs); |
1000 | 0 | size_t index; |
1001 | 0 | ctmbstr result = NULL; |
1002 | 0 | assert( iter != NULL ); |
1003 | 0 | index = (size_t)*iter; |
1004 | |
|
1005 | 0 | if ( index > 0 && index <= priorities->count ) |
1006 | 0 | { |
1007 | 0 | result = priorities->list[index-1]; |
1008 | 0 | index++; |
1009 | 0 | } |
1010 | 0 | *iter = (TidyIterator) ( index <= priorities->count ? index : (size_t)0 ); |
1011 | |
|
1012 | 0 | return result; |
1013 | 0 | } |
1014 | | |
1015 | | |
1016 | | static Bool CheckAttrType( TidyDocImpl* doc, |
1017 | | ctmbstr attrname, AttrCheck type ) |
1018 | 51.9k | { |
1019 | 51.9k | const Attribute* np = attrsLookup( doc, &doc->attribs, attrname ); |
1020 | 51.9k | return (Bool)( np && np->attrchk == type ); |
1021 | 51.9k | } |
1022 | | |
1023 | | Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname ) |
1024 | 51.9k | { |
1025 | 51.9k | return CheckAttrType( doc, attrname, CH_URL ); |
1026 | 51.9k | } |
1027 | | |
1028 | | /* |
1029 | | Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ) |
1030 | | { |
1031 | | return CheckAttrType( doc, attrname, CH_BOOL ); |
1032 | | } |
1033 | | */ |
1034 | | |
1035 | | Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname ) |
1036 | 26 | { |
1037 | 26 | return CheckAttrType( doc, attrname, CH_SCRIPT ); |
1038 | 26 | } |
1039 | | |
1040 | | /* may id or name serve as anchor? */ |
1041 | | Bool TY_(IsAnchorElement)( TidyDocImpl* ARG_UNUSED(doc), Node* node) |
1042 | 296k | { |
1043 | 296k | TidyTagId tid = TagId( node ); |
1044 | 296k | if ( tid == TidyTag_A || |
1045 | 296k | tid == TidyTag_APPLET || |
1046 | 296k | tid == TidyTag_FORM || |
1047 | 296k | tid == TidyTag_FRAME || |
1048 | 296k | tid == TidyTag_IFRAME || |
1049 | 296k | tid == TidyTag_IMG || |
1050 | 296k | tid == TidyTag_MAP ) |
1051 | 3.52k | return yes; |
1052 | | |
1053 | 293k | return no; |
1054 | 296k | } |
1055 | | |
1056 | | /* |
1057 | | In CSS1, selectors can contain only the characters A-Z, 0-9, |
1058 | | and Unicode characters 161-255, plus dash (-); they cannot start |
1059 | | with a dash or a digit; they can also contain escaped characters |
1060 | | and any Unicode character as a numeric code (see next item). |
1061 | | |
1062 | | The backslash followed by at most four hexadecimal digits |
1063 | | (0..9A..F) stands for the Unicode character with that number. |
1064 | | |
1065 | | Any character except a hexadecimal digit can be escaped to remove |
1066 | | its special meaning, by putting a backslash in front. |
1067 | | |
1068 | | #508936 - CSS class naming for -clean option |
1069 | | */ |
1070 | | Bool TY_(IsCSS1Selector)( ctmbstr buf ) |
1071 | 0 | { |
1072 | 0 | Bool valid = yes; |
1073 | 0 | int esclen = 0; |
1074 | 0 | byte c; |
1075 | 0 | int pos; |
1076 | |
|
1077 | 0 | for ( pos=0; valid && (c = *buf++); ++pos ) |
1078 | 0 | { |
1079 | 0 | if ( c == '\\' ) |
1080 | 0 | { |
1081 | 0 | esclen = 1; /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */ |
1082 | 0 | } |
1083 | 0 | else if ( isdigit( c ) ) |
1084 | 0 | { |
1085 | | /* Digit not 1st, unless escaped (Max length "\112F") */ |
1086 | 0 | if ( esclen > 0 ) |
1087 | 0 | valid = ( ++esclen < 6 ); |
1088 | 0 | if ( valid ) |
1089 | 0 | valid = ( pos>0 || esclen>0 ); |
1090 | 0 | } |
1091 | 0 | else |
1092 | 0 | { |
1093 | 0 | valid = ( |
1094 | 0 | esclen > 0 /* Escaped? Anything goes. */ |
1095 | 0 | || ( pos>0 && c == '-' ) /* Dash cannot be 1st char */ |
1096 | | || isalpha(c) /* a-z, A-Z anywhere */ |
1097 | 0 | || ( c >= 161 ) /* Unicode 161-255 anywhere */ |
1098 | 0 | ); |
1099 | 0 | esclen = 0; |
1100 | 0 | } |
1101 | 0 | } |
1102 | 0 | return valid; |
1103 | 0 | } |
1104 | | |
1105 | | /* free single anchor */ |
1106 | | static void FreeAnchor(TidyDocImpl* doc, Anchor *a) |
1107 | 118 | { |
1108 | 118 | if ( a ) |
1109 | 118 | TidyDocFree( doc, a->name ); |
1110 | 118 | TidyDocFree( doc, a ); |
1111 | 118 | } |
1112 | | |
1113 | | static uint anchorNameHash(ctmbstr s) |
1114 | 1.38k | { |
1115 | 1.38k | uint hashval = 0; |
1116 | | /* Issue #149 - an inferred name can be null. avoid crash */ |
1117 | 1.38k | if (s) |
1118 | 1.34k | { |
1119 | 41.4k | for ( ; *s != '\0'; s++) { |
1120 | 40.1k | tmbchar c = TY_(ToLower)( *s ); |
1121 | 40.1k | hashval = c + 31*hashval; |
1122 | 40.1k | } |
1123 | 1.34k | } |
1124 | 1.38k | return hashval % ANCHOR_HASH_SIZE; |
1125 | 1.38k | } |
1126 | | |
1127 | | /*\ |
1128 | | * New Service for HTML5 |
1129 | | * Issue #185 - Treat elements ids as case-sensitive |
1130 | | * if in HTML5 modes, make hash of value AS IS! |
1131 | | \*/ |
1132 | | static uint anchorNameHash5(ctmbstr s) |
1133 | 0 | { |
1134 | 0 | uint hashval = 0; |
1135 | | /* Issue #149 - an inferred name can be null. avoid crash */ |
1136 | 0 | if (s) |
1137 | 0 | { |
1138 | 0 | for ( ; *s != '\0'; s++) { |
1139 | 0 | tmbchar c = *s; |
1140 | 0 | hashval = c + 31*hashval; |
1141 | 0 | } |
1142 | 0 | } |
1143 | 0 | return hashval % ANCHOR_HASH_SIZE; |
1144 | 0 | } |
1145 | | |
1146 | | |
1147 | | /*\ |
1148 | | * removes anchor for specific node |
1149 | | * Issue #185 - Treat elements ids as case-sensitive |
1150 | | * if in HTML5 modes, make hash of value AS IS! |
1151 | | \*/ |
1152 | | void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node ) |
1153 | 104 | { |
1154 | 104 | TidyAttribImpl* attribs = &doc->attribs; |
1155 | 104 | Anchor *delme = NULL, *curr, *prev = NULL; |
1156 | 104 | uint h; |
1157 | 104 | if (TY_(HTMLVersion)(doc) == HT50) |
1158 | 0 | h = anchorNameHash5(name); |
1159 | 104 | else |
1160 | 104 | h = anchorNameHash(name); |
1161 | | |
1162 | 104 | for ( curr=attribs->anchor_hash[h]; curr!=NULL; curr=curr->next ) |
1163 | 36 | { |
1164 | 36 | if ( curr->node == node ) |
1165 | 36 | { |
1166 | 36 | if ( prev ) |
1167 | 0 | prev->next = curr->next; |
1168 | 36 | else |
1169 | 36 | attribs->anchor_hash[h] = curr->next; |
1170 | 36 | delme = curr; |
1171 | 36 | break; |
1172 | 36 | } |
1173 | 0 | prev = curr; |
1174 | 0 | } |
1175 | 104 | FreeAnchor( doc, delme ); |
1176 | 104 | } |
1177 | | |
1178 | | /* initialize new anchor |
1179 | | Is. #726 & #185 - HTML5 is case-sensitive |
1180 | | */ |
1181 | | static Anchor* NewAnchor( TidyDocImpl* doc, ctmbstr name, Node* node ) |
1182 | 50 | { |
1183 | 50 | Anchor *a = (Anchor*) TidyDocAlloc( doc, sizeof(Anchor) ); |
1184 | | |
1185 | 50 | a->name = TY_(tmbstrdup)( doc->allocator, name ); |
1186 | 50 | if (!TY_(IsHTML5Mode)(doc)) /* Is. #726 - if NOT HTML5, to lowercase */ |
1187 | 50 | a->name = TY_(tmbstrtolower)(a->name); |
1188 | 50 | a->node = node; |
1189 | 50 | a->next = NULL; |
1190 | | |
1191 | 50 | return a; |
1192 | 50 | } |
1193 | | |
1194 | | /*\ |
1195 | | * add new anchor to namespace |
1196 | | * Issue #185 - Treat elements ids as case-sensitive |
1197 | | * if in HTML5 modes, make hash of value AS IS! |
1198 | | \*/ |
1199 | | static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node ) |
1200 | 50 | { |
1201 | 50 | TidyAttribImpl* attribs = &doc->attribs; |
1202 | 50 | Anchor *a = NewAnchor( doc, name, node ); |
1203 | 50 | uint h; |
1204 | 50 | if (TY_(HTMLVersion)(doc) == HT50) |
1205 | 0 | h = anchorNameHash5(name); |
1206 | 50 | else |
1207 | 50 | h = anchorNameHash(name); |
1208 | | |
1209 | 50 | if ( attribs->anchor_hash[h] == NULL) |
1210 | 42 | attribs->anchor_hash[h] = a; |
1211 | 8 | else |
1212 | 8 | { |
1213 | 8 | Anchor *here = attribs->anchor_hash[h]; |
1214 | 8 | while (here->next) |
1215 | 0 | here = here->next; |
1216 | 8 | here->next = a; |
1217 | 8 | } |
1218 | | |
1219 | 50 | return attribs->anchor_hash[h]; |
1220 | 50 | } |
1221 | | |
1222 | | /*\ |
1223 | | * return node associated with anchor |
1224 | | * Issue #185 - Treat elements ids as case-sensitive |
1225 | | * if in HTML5 modes, make hash of value AS IS! |
1226 | | \*/ |
1227 | | static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name ) |
1228 | 1.22k | { |
1229 | 1.22k | TidyAttribImpl* attribs = &doc->attribs; |
1230 | 1.22k | Anchor *found; |
1231 | 1.22k | uint h; |
1232 | 1.22k | tmbstr lname = TY_(tmbstrdup)(doc->allocator, name); |
1233 | 1.22k | if (TY_(HTMLVersion)(doc) == HT50) { |
1234 | 0 | h = anchorNameHash5(name); |
1235 | 0 | } |
1236 | 1.22k | else |
1237 | 1.22k | { |
1238 | 1.22k | h = anchorNameHash(name); |
1239 | 1.22k | lname = TY_(tmbstrtolower)(lname); |
1240 | 1.22k | } |
1241 | | |
1242 | 1.22k | for ( found = attribs->anchor_hash[h]; found != NULL; found = found->next ) |
1243 | 1.18k | { |
1244 | 1.18k | if ( TY_(tmbstrcmp)(found->name, lname) == 0 ) |
1245 | 1.18k | break; |
1246 | 1.18k | } |
1247 | | |
1248 | 1.22k | TidyDocFree(doc, lname); |
1249 | 1.22k | if ( found ) |
1250 | 1.18k | return found->node; |
1251 | 42 | return NULL; |
1252 | 1.22k | } |
1253 | | |
1254 | | /* free all anchors */ |
1255 | | void TY_(FreeAnchors)( TidyDocImpl* doc ) |
1256 | 104 | { |
1257 | 104 | TidyAttribImpl* attribs = &doc->attribs; |
1258 | 104 | Anchor* a; |
1259 | 104 | uint h; |
1260 | 106k | for (h = 0; h < ANCHOR_HASH_SIZE; h++) { |
1261 | 106k | while (NULL != (a = attribs->anchor_hash[h]) ) |
1262 | 14 | { |
1263 | 14 | attribs->anchor_hash[h] = a->next; |
1264 | 14 | FreeAnchor(doc, a); |
1265 | 14 | } |
1266 | 106k | } |
1267 | 104 | } |
1268 | | |
1269 | | /* public method for inititializing attribute dictionary */ |
1270 | | void TY_(InitAttrs)( TidyDocImpl* doc ) |
1271 | 52 | { |
1272 | 52 | TidyClearMemory( &doc->attribs, sizeof(TidyAttribImpl) ); |
1273 | | #ifdef _DEBUG |
1274 | | { |
1275 | | /* Attribute ID is index position in Attribute type lookup table */ |
1276 | | uint ix; |
1277 | | for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix ) |
1278 | | { |
1279 | | const Attribute* dict = &attribute_defs[ ix ]; |
1280 | | assert( (uint) dict->id == ix ); |
1281 | | } |
1282 | | } |
1283 | | #endif |
1284 | 52 | } |
1285 | | |
1286 | | /* free all declared attributes */ |
1287 | | static void FreeDeclaredAttributes( TidyDocImpl* doc ) |
1288 | 52 | { |
1289 | 52 | TidyAttribImpl* attribs = &doc->attribs; |
1290 | 52 | Attribute* dict; |
1291 | 52 | while ( NULL != (dict = attribs->declared_attr_list) ) |
1292 | 0 | { |
1293 | 0 | attribs->declared_attr_list = dict->next; |
1294 | 0 | attrsRemoveFromHash( doc, &doc->attribs, dict->name ); |
1295 | 0 | TidyDocFree( doc, dict->name ); |
1296 | 0 | TidyDocFree( doc, dict ); |
1297 | 0 | } |
1298 | 52 | } |
1299 | | |
1300 | | void TY_(FreeAttrTable)( TidyDocImpl* doc ) |
1301 | 52 | { |
1302 | 52 | attrsEmptyHash( doc, &doc->attribs ); |
1303 | 52 | TY_(FreeAnchors)( doc ); |
1304 | 52 | FreeDeclaredAttributes( doc ); |
1305 | 52 | } |
1306 | | |
1307 | | void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname ) |
1308 | 0 | { |
1309 | 0 | uint len = TY_(tmbstrlen)(classattr->value) + |
1310 | 0 | TY_(tmbstrlen)(classname) + 2; |
1311 | 0 | tmbstr s = (tmbstr) TidyDocAlloc( doc, len ); |
1312 | 0 | s[0] = '\0'; |
1313 | 0 | if (classattr->value) |
1314 | 0 | { |
1315 | 0 | TY_(tmbstrcpy)( s, classattr->value ); |
1316 | 0 | TY_(tmbstrcat)( s, " " ); |
1317 | 0 | } |
1318 | 0 | TY_(tmbstrcat)( s, classname ); |
1319 | 0 | if (classattr->value) |
1320 | 0 | TidyDocFree( doc, classattr->value ); |
1321 | 0 | classattr->value = s; |
1322 | 0 | } |
1323 | | |
1324 | | /* concatenate styles */ |
1325 | | static void AppendToStyleAttr( TidyDocImpl* doc, AttVal *styleattr, ctmbstr styleprop ) |
1326 | 0 | { |
1327 | | /* |
1328 | | this doesn't handle CSS comments and |
1329 | | leading/trailing white-space very well |
1330 | | see https://www.w3.org/TR/css-style-attr/ |
1331 | | */ |
1332 | 0 | uint end = TY_(tmbstrlen)(styleattr->value); |
1333 | |
|
1334 | 0 | if (end >0 && styleattr->value[end - 1] == ';') |
1335 | 0 | { |
1336 | | /* attribute ends with declaration separator */ |
1337 | |
|
1338 | 0 | styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value, |
1339 | 0 | end + TY_(tmbstrlen)(styleprop) + 2); |
1340 | |
|
1341 | 0 | TY_(tmbstrcat)(styleattr->value, " "); |
1342 | 0 | TY_(tmbstrcat)(styleattr->value, styleprop); |
1343 | 0 | } |
1344 | 0 | else if (end >0 && styleattr->value[end - 1] == '}') |
1345 | 0 | { |
1346 | | /* attribute ends with rule set */ |
1347 | |
|
1348 | 0 | styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value, |
1349 | 0 | end + TY_(tmbstrlen)(styleprop) + 6); |
1350 | |
|
1351 | 0 | TY_(tmbstrcat)(styleattr->value, " { "); |
1352 | 0 | TY_(tmbstrcat)(styleattr->value, styleprop); |
1353 | 0 | TY_(tmbstrcat)(styleattr->value, " }"); |
1354 | 0 | } |
1355 | 0 | else |
1356 | 0 | { |
1357 | | /* attribute ends with property value */ |
1358 | |
|
1359 | 0 | styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value, |
1360 | 0 | end + TY_(tmbstrlen)(styleprop) + 3); |
1361 | |
|
1362 | 0 | if (end > 0) |
1363 | 0 | TY_(tmbstrcat)(styleattr->value, "; "); |
1364 | 0 | TY_(tmbstrcat)(styleattr->value, styleprop); |
1365 | 0 | } |
1366 | 0 | } |
1367 | | |
1368 | | /* |
1369 | | the same attribute name can't be used |
1370 | | more than once in each element |
1371 | | */ |
1372 | | static Bool AttrsHaveSameName( AttVal* av1, AttVal* av2) |
1373 | 551 | { |
1374 | 551 | TidyAttrId id1, id2; |
1375 | | |
1376 | 551 | id1 = AttrId(av1); |
1377 | 551 | id2 = AttrId(av2); |
1378 | 551 | if (id1 != TidyAttr_UNKNOWN && id2 != TidyAttr_UNKNOWN) |
1379 | 309 | return AttrsHaveSameId(av1, av2); |
1380 | 242 | if (id1 != TidyAttr_UNKNOWN || id2 != TidyAttr_UNKNOWN) |
1381 | 166 | return no; |
1382 | 76 | if (av1->attribute && av2->attribute) |
1383 | 76 | return TY_(tmbstrcmp)(av1->attribute, av2->attribute) == 0; |
1384 | 0 | return no; |
1385 | 76 | } |
1386 | | |
1387 | | void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node *node, Bool isXml ) |
1388 | 139k | { |
1389 | 139k | AttVal *first; |
1390 | | |
1391 | 154k | for (first = node->attributes; first != NULL;) |
1392 | 14.9k | { |
1393 | 14.9k | AttVal *second; |
1394 | 14.9k | Bool firstRedefined = no; |
1395 | | |
1396 | 14.9k | if (!(first->asp == NULL && first->php == NULL)) |
1397 | 125 | { |
1398 | 125 | first = first->next; |
1399 | 125 | continue; |
1400 | 125 | } |
1401 | | |
1402 | 15.3k | for (second = first->next; second != NULL;) |
1403 | 551 | { |
1404 | 551 | AttVal *temp; |
1405 | | |
1406 | 551 | if (!(second->asp == NULL && second->php == NULL |
1407 | 551 | && AttrsHaveSameName(first, second))) |
1408 | 434 | { |
1409 | 434 | second = second->next; |
1410 | 434 | continue; |
1411 | 434 | } |
1412 | | |
1413 | | /* first and second attribute have same local name */ |
1414 | | /* now determine what to do with this duplicate... */ |
1415 | | |
1416 | 117 | if (!isXml |
1417 | 117 | && attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses) |
1418 | 117 | && AttrHasValue(first) && AttrHasValue(second)) |
1419 | 0 | { |
1420 | | /* concatenate classes */ |
1421 | |
|
1422 | 0 | TY_(AppendToClassAttr)(doc, first, second->value); |
1423 | |
|
1424 | 0 | temp = second->next; |
1425 | 0 | TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE); |
1426 | 0 | TY_(RemoveAttribute)( doc, node, second ); |
1427 | 0 | second = temp; |
1428 | 0 | } |
1429 | 117 | else if (!isXml |
1430 | 117 | && attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles) |
1431 | 117 | && AttrHasValue(first) && AttrHasValue(second)) |
1432 | 0 | { |
1433 | 0 | AppendToStyleAttr( doc, first, second->value ); |
1434 | |
|
1435 | 0 | temp = second->next; |
1436 | 0 | TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE); |
1437 | 0 | TY_(RemoveAttribute)( doc, node, second ); |
1438 | 0 | second = temp; |
1439 | 0 | } |
1440 | 117 | else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast ) |
1441 | 117 | { |
1442 | 117 | temp = first->next; |
1443 | 117 | TY_(ReportAttrError)( doc, node, first, REPEATED_ATTRIBUTE); |
1444 | 117 | TY_(RemoveAttribute)( doc, node, first ); |
1445 | 117 | firstRedefined = yes; |
1446 | 117 | first = temp; |
1447 | 117 | second = second->next; |
1448 | 117 | } |
1449 | 0 | else /* TidyDuplicateAttrs == TidyKeepFirst */ |
1450 | 0 | { |
1451 | 0 | temp = second->next; |
1452 | 0 | TY_(ReportAttrError)( doc, node, second, REPEATED_ATTRIBUTE); |
1453 | 0 | TY_(RemoveAttribute)( doc, node, second ); |
1454 | 0 | second = temp; |
1455 | 0 | } |
1456 | 117 | } |
1457 | 14.8k | if (!firstRedefined) |
1458 | 14.7k | first = first->next; |
1459 | 14.8k | } |
1460 | 139k | } |
1461 | | |
1462 | | /* ignore unknown attributes for proprietary elements */ |
1463 | | const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval ) |
1464 | 34.1k | { |
1465 | 34.1k | const Attribute* attribute = attval->dict; |
1466 | | |
1467 | 34.1k | if ( attribute != NULL ) |
1468 | 6.22k | { |
1469 | 6.22k | if (attrIsXML_LANG(attval) || attrIsXML_SPACE(attval)) |
1470 | 10 | { |
1471 | 10 | doc->lexer->isvoyager = yes; |
1472 | 10 | if (!cfgBool(doc, TidyHtmlOut)) |
1473 | 10 | { |
1474 | 10 | TY_(SetOptionBool)(doc, TidyXhtmlOut, yes); |
1475 | 10 | TY_(SetOptionBool)(doc, TidyXmlOut, yes); |
1476 | 10 | } |
1477 | 10 | } |
1478 | | |
1479 | 6.22k | TY_(ConstrainVersion)(doc, AttributeVersions(node, attval)); |
1480 | | |
1481 | 6.22k | if (attribute->attrchk) |
1482 | 3.79k | attribute->attrchk( doc, node, attval ); |
1483 | 6.22k | } |
1484 | | |
1485 | 34.1k | return attribute; |
1486 | 34.1k | } |
1487 | | |
1488 | | Bool TY_(IsBoolAttribute)(AttVal *attval) |
1489 | 12.8k | { |
1490 | 12.8k | const Attribute *attribute = ( attval ? attval->dict : NULL ); |
1491 | 12.8k | if ( attribute && attribute->attrchk == CH_BOOL ) |
1492 | 0 | return yes; |
1493 | 12.8k | return no; |
1494 | 12.8k | } |
1495 | | |
1496 | | Bool TY_(attrIsEvent)( AttVal* attval ) |
1497 | 12.8k | { |
1498 | 12.8k | TidyAttrId atid = AttrId( attval ); |
1499 | | |
1500 | 12.8k | return (atid == TidyAttr_OnAFTERUPDATE || |
1501 | 12.8k | atid == TidyAttr_OnBEFOREUNLOAD || |
1502 | 12.8k | atid == TidyAttr_OnBEFOREUPDATE || |
1503 | 12.8k | atid == TidyAttr_OnBLUR || |
1504 | 12.8k | atid == TidyAttr_OnCHANGE || |
1505 | 12.8k | atid == TidyAttr_OnCLICK || |
1506 | 12.8k | atid == TidyAttr_OnDATAAVAILABLE || |
1507 | 12.8k | atid == TidyAttr_OnDATASETCHANGED || |
1508 | 12.8k | atid == TidyAttr_OnDATASETCOMPLETE || |
1509 | 12.8k | atid == TidyAttr_OnDBLCLICK || |
1510 | 12.8k | atid == TidyAttr_OnERRORUPDATE || |
1511 | 12.8k | atid == TidyAttr_OnFOCUS || |
1512 | 12.8k | atid == TidyAttr_OnKEYDOWN || |
1513 | 12.8k | atid == TidyAttr_OnKEYPRESS || |
1514 | 12.8k | atid == TidyAttr_OnKEYUP || |
1515 | 12.8k | atid == TidyAttr_OnLOAD || |
1516 | 12.8k | atid == TidyAttr_OnMOUSEDOWN || |
1517 | 12.8k | atid == TidyAttr_OnMOUSEMOVE || |
1518 | 12.8k | atid == TidyAttr_OnMOUSEOUT || |
1519 | 12.8k | atid == TidyAttr_OnMOUSEOVER || |
1520 | 12.8k | atid == TidyAttr_OnMOUSEUP || |
1521 | 12.8k | atid == TidyAttr_OnRESET || |
1522 | 12.8k | atid == TidyAttr_OnROWENTER || |
1523 | 12.8k | atid == TidyAttr_OnROWEXIT || |
1524 | 12.8k | atid == TidyAttr_OnSELECT || |
1525 | 12.8k | atid == TidyAttr_OnSUBMIT || |
1526 | 12.8k | atid == TidyAttr_OnUNLOAD); |
1527 | 12.8k | } |
1528 | | |
1529 | | static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval) |
1530 | 438 | { |
1531 | 438 | tmbstr p; |
1532 | 438 | Bool hasUpper = no; |
1533 | | |
1534 | 438 | if (!AttrHasValue(attval)) |
1535 | 0 | return; |
1536 | | |
1537 | 438 | p = attval->value; |
1538 | | |
1539 | 2.12k | while (*p) |
1540 | 2.10k | { |
1541 | 2.10k | if (TY_(IsUpper)(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */ |
1542 | 418 | { |
1543 | 418 | hasUpper = yes; |
1544 | 418 | break; |
1545 | 418 | } |
1546 | 1.68k | p++; |
1547 | 1.68k | } |
1548 | | |
1549 | 438 | if (hasUpper) |
1550 | 418 | { |
1551 | 418 | Lexer* lexer = doc->lexer; |
1552 | 418 | if (lexer->isvoyager) |
1553 | 20 | TY_(ReportAttrError)( doc, node, attval, ATTR_VALUE_NOT_LCASE); |
1554 | | |
1555 | 418 | if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) ) |
1556 | 418 | attval->value = TY_(tmbstrtolower)(attval->value); |
1557 | 418 | } |
1558 | 438 | } |
1559 | | |
1560 | | /* Issue #588 - use simple macros only! |
1561 | | Seems 'isalnum(c)' is undefined and can |
1562 | | cause an assert or a SIGSEGV in some libraries |
1563 | | if 'c' is not EOF, or in the range 0 to 0xff, |
1564 | | so avoid using it. |
1565 | | */ |
1566 | 25.6k | #define ISUPPER(a) ((a >= 'A') && (a <= 'Z')) |
1567 | 24.5k | #define ISLOWER(a) ((a >= 'a') && (a <= 'z')) |
1568 | 10.2k | #define ISNUMERIC(a) ((a >= '0') && (a <= '9')) |
1569 | 25.6k | #define ISALNUM(a) (ISUPPER(a) || ISLOWER(a) || ISNUMERIC(a)) |
1570 | | |
1571 | | static Bool IsURLCodePoint( ctmbstr p, uint *increment ) |
1572 | 12.8k | { |
1573 | 12.8k | uint c; |
1574 | 12.8k | *increment = TY_(GetUTF8)( p, &c ) + 1; |
1575 | | |
1576 | 12.8k | return ISALNUM( c ) || |
1577 | 12.8k | c == '%' || /* not a valid codepoint, but an escape sequence */ |
1578 | 12.8k | c == '#' || /* not a valid codepoint, but a delimiter */ |
1579 | 12.8k | c == '!' || |
1580 | 12.8k | c == '$' || |
1581 | 12.8k | c == '&' || |
1582 | 12.8k | c == '\'' || |
1583 | 12.8k | c == '(' || |
1584 | 12.8k | c == ')' || |
1585 | 12.8k | c == '*' || |
1586 | 12.8k | c == '+' || |
1587 | 12.8k | c == ',' || |
1588 | 12.8k | c == '-' || |
1589 | 12.8k | c == '.' || |
1590 | 12.8k | c == '/' || |
1591 | 12.8k | c == ':' || |
1592 | 12.8k | c == ';' || |
1593 | 12.8k | c == '=' || |
1594 | 12.8k | c == '?' || |
1595 | 12.8k | c == '@' || |
1596 | 12.8k | c == '_' || |
1597 | 12.8k | c == '~' || |
1598 | 12.8k | (c >= 0x00A0 && c <= 0xD7FF) || |
1599 | 12.8k | (c >= 0xE000 && c <= 0xFDCF) || |
1600 | 12.8k | (c >= 0xFDF0 && c <= 0xFFEF) || |
1601 | 12.8k | (c >= 0x10000 && c <= 0x1FFFD) || |
1602 | 12.8k | (c >= 0x20000 && c <= 0x2FFFD) || |
1603 | 12.8k | (c >= 0x30000 && c <= 0x3FFFD) || |
1604 | 12.8k | (c >= 0x40000 && c <= 0x4FFFD) || |
1605 | 12.8k | (c >= 0x50000 && c <= 0x5FFFD) || |
1606 | 12.8k | (c >= 0x60000 && c <= 0x6FFFD) || |
1607 | 12.8k | (c >= 0x70000 && c <= 0x7FFFD) || |
1608 | 12.8k | (c >= 0x80000 && c <= 0x8FFFD) || |
1609 | 12.8k | (c >= 0x90000 && c <= 0x9FFFD) || |
1610 | 12.8k | (c >= 0xA0000 && c <= 0xAFFFD) || |
1611 | 12.8k | (c >= 0xB0000 && c <= 0xBFFFD) || |
1612 | 12.8k | (c >= 0xC0000 && c <= 0xCFFFD) || |
1613 | 12.8k | (c >= 0xD0000 && c <= 0xDFFFD) || |
1614 | 12.8k | (c >= 0xE0000 && c <= 0xEFFFD) || |
1615 | 12.8k | (c >= 0xF0000 && c <= 0xFFFFD) || |
1616 | 12.8k | (c >= 0x100000 && c <= 0x10FFFD); |
1617 | 12.8k | } |
1618 | | |
1619 | | void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval) |
1620 | 107 | { |
1621 | 107 | tmbchar c; |
1622 | 107 | tmbstr dest, p; |
1623 | 107 | uint escape_count = 0, backslash_count = 0, bad_codepoint_count = 0; |
1624 | 107 | uint i, pos = 0; |
1625 | 107 | uint len; |
1626 | 107 | uint increment; |
1627 | 107 | Bool isJavascript = no; |
1628 | | |
1629 | 107 | if (!AttrHasValue(attval)) |
1630 | 0 | { |
1631 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1632 | 0 | return; |
1633 | 0 | } |
1634 | | |
1635 | 107 | p = attval->value; |
1636 | | |
1637 | 107 | isJavascript = |
1638 | 107 | TY_(tmbstrncmp)(p,"javascript:",sizeof("javascript:")-1)==0; |
1639 | | |
1640 | 19.9k | for (i = 0; '\0' != (c = p[i]); ++i) |
1641 | 19.8k | { |
1642 | 19.8k | if (c == '\\') |
1643 | 0 | { |
1644 | 0 | ++backslash_count; |
1645 | 0 | if ( cfgBool(doc, TidyFixBackslash) && !isJavascript) |
1646 | 0 | p[i] = '/'; |
1647 | 0 | } |
1648 | 19.8k | else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) |
1649 | 14.6k | ++escape_count; |
1650 | 19.8k | } |
1651 | | |
1652 | 12.9k | while ( *p != 0 ) |
1653 | 12.8k | { |
1654 | 12.8k | if ( !IsURLCodePoint( p, &increment ) ) |
1655 | 7.39k | ++bad_codepoint_count; |
1656 | 12.8k | p = p + increment; |
1657 | 12.8k | } |
1658 | 107 | p = attval->value; |
1659 | | |
1660 | 107 | if ( cfgBool(doc, TidyFixUri) && escape_count ) |
1661 | 100 | { |
1662 | 100 | Bool hadnonspace = no; |
1663 | 100 | len = TY_(tmbstrlen)(p) + escape_count * 2 + 1; |
1664 | 100 | dest = (tmbstr) TidyDocAlloc(doc, len); |
1665 | | |
1666 | 19.6k | for (i = 0; 0 != (c = p[i]); ++i) |
1667 | 19.5k | { |
1668 | 19.5k | if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) |
1669 | 14.6k | { |
1670 | 14.6k | if (c == 0x20) |
1671 | 1.31k | { |
1672 | | /* #345 - special case for leading spaces - discard */ |
1673 | 1.31k | if (hadnonspace) |
1674 | 1.23k | pos += sprintf( dest + pos, "%%%02X", (byte)c ); |
1675 | 1.31k | } |
1676 | 13.3k | else |
1677 | 13.3k | { |
1678 | 13.3k | pos += sprintf( dest + pos, "%%%02X", (byte)c ); |
1679 | 13.3k | hadnonspace = yes; |
1680 | 13.3k | } |
1681 | 14.6k | } |
1682 | 4.89k | else |
1683 | 4.89k | { |
1684 | 4.89k | hadnonspace = yes; |
1685 | 4.89k | dest[pos++] = c; |
1686 | 4.89k | } |
1687 | 19.5k | } |
1688 | 100 | dest[pos] = 0; |
1689 | | |
1690 | 100 | TidyDocFree(doc, attval->value); |
1691 | 100 | attval->value = dest; |
1692 | 100 | } |
1693 | 107 | if ( backslash_count ) |
1694 | 0 | { |
1695 | 0 | if ( cfgBool(doc, TidyFixBackslash) && !isJavascript ) |
1696 | 0 | TY_(ReportAttrError)( doc, node, attval, FIXED_BACKSLASH ); |
1697 | 0 | else |
1698 | 0 | TY_(ReportAttrError)( doc, node, attval, BACKSLASH_IN_URI ); |
1699 | 0 | } |
1700 | 107 | if ( escape_count ) |
1701 | 100 | { |
1702 | 100 | if ( cfgBool(doc, TidyFixUri) ) |
1703 | 100 | TY_(ReportAttrError)( doc, node, attval, ESCAPED_ILLEGAL_URI); |
1704 | 0 | else if ( !(TY_(HTMLVersion)(doc) & VERS_HTML5) ) |
1705 | 0 | TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_REFERENCE); |
1706 | | |
1707 | 100 | doc->badChars |= BC_INVALID_URI; |
1708 | 100 | } |
1709 | 107 | if ( bad_codepoint_count ) |
1710 | 100 | { |
1711 | 100 | TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_CODEPOINT ); |
1712 | 100 | } |
1713 | 107 | } |
1714 | | |
1715 | | /* RFC 2396, section 4.2 states: |
1716 | | "[...] in the case of HTML's FORM element, [...] an |
1717 | | empty URI reference represents the base URI of the |
1718 | | current document and should be replaced by that URI |
1719 | | when transformed into a request." |
1720 | | */ |
1721 | | void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval) |
1722 | 0 | { |
1723 | 0 | if (AttrHasValue(attval)) |
1724 | 0 | TY_(CheckUrl)( doc, node, attval ); |
1725 | 0 | } |
1726 | | |
1727 | | void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node), |
1728 | | AttVal* ARG_UNUSED(attval)) |
1729 | 2 | { |
1730 | 2 | } |
1731 | | |
1732 | | Bool TY_(IsValidHTMLID)(ctmbstr id) |
1733 | 1.21k | { |
1734 | 1.21k | ctmbstr s = id; |
1735 | | |
1736 | 1.21k | if (!s) |
1737 | 0 | return no; |
1738 | | |
1739 | 38.8k | while (*s) |
1740 | 37.6k | if (TY_(IsHTMLSpace)(*s++)) |
1741 | 5 | return no; |
1742 | | |
1743 | 1.20k | return yes; |
1744 | | |
1745 | 1.21k | } |
1746 | | |
1747 | | Bool TY_(IsValidXMLID)(ctmbstr id) |
1748 | 1.43k | { |
1749 | 1.43k | ctmbstr s = id; |
1750 | 1.43k | tchar c; |
1751 | | |
1752 | 1.43k | if (!s) |
1753 | 0 | return no; |
1754 | | |
1755 | 1.43k | c = *s++; |
1756 | 1.43k | if (c > 0x7F) |
1757 | 45 | s += TY_(GetUTF8)(s, &c); |
1758 | | |
1759 | 1.43k | if (!(TY_(IsXMLLetter)(c) || c == '_' || c == ':')) |
1760 | 98 | return no; |
1761 | | |
1762 | 3.81k | while (*s) |
1763 | 3.19k | { |
1764 | 3.19k | c = (unsigned char)*s; |
1765 | | |
1766 | 3.19k | if (c > 0x7F) |
1767 | 1.60k | s += TY_(GetUTF8)(s, &c); |
1768 | | |
1769 | 3.19k | ++s; |
1770 | | |
1771 | 3.19k | if (!TY_(IsXMLNamechar)(c)) |
1772 | 717 | return no; |
1773 | 3.19k | } |
1774 | | |
1775 | 617 | return yes; |
1776 | 1.33k | } |
1777 | | |
1778 | | static Bool IsValidNMTOKEN(ctmbstr name) |
1779 | 32 | { |
1780 | 32 | ctmbstr s = name; |
1781 | 32 | tchar c; |
1782 | | |
1783 | 32 | if (!s) |
1784 | 0 | return no; |
1785 | | |
1786 | 274 | while (*s) |
1787 | 254 | { |
1788 | 254 | c = (unsigned char)*s; |
1789 | | |
1790 | 254 | if (c > 0x7F) |
1791 | 14 | s += TY_(GetUTF8)(s, &c); |
1792 | | |
1793 | 254 | ++s; |
1794 | | |
1795 | 254 | if (!TY_(IsXMLNamechar)(c)) |
1796 | 12 | return no; |
1797 | 254 | } |
1798 | | |
1799 | 20 | return yes; |
1800 | 32 | } |
1801 | | |
1802 | | static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[]) |
1803 | 873 | { |
1804 | 873 | const ctmbstr *v; |
1805 | 2.63k | for (v = list; *v; ++v) |
1806 | 2.18k | if (AttrValueIs(attval, *v)) |
1807 | 420 | return yes; |
1808 | 453 | return no; |
1809 | 873 | } |
1810 | | |
1811 | | static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval, |
1812 | | ctmbstr const list[]) |
1813 | 428 | { |
1814 | 428 | if (!AttrHasValue(attval)) |
1815 | 0 | { |
1816 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1817 | 0 | return; |
1818 | 0 | } |
1819 | | |
1820 | 428 | CheckLowerCaseAttrValue( doc, node, attval ); |
1821 | | |
1822 | 428 | if (!AttrValueIsAmong(attval, list)) |
1823 | 428 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1824 | 428 | } |
1825 | | |
1826 | | void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval) |
1827 | 58 | { |
1828 | 58 | Node *old; |
1829 | | |
1830 | 58 | if (!AttrHasValue(attval)) |
1831 | 0 | { |
1832 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1833 | 0 | return; |
1834 | 0 | } |
1835 | | |
1836 | 58 | if ( TY_(IsAnchorElement)(doc, node) ) |
1837 | 32 | { |
1838 | 32 | if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value)) |
1839 | 12 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1840 | | |
1841 | 32 | if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) |
1842 | 6 | { |
1843 | 6 | if (node->implicit) /* Is #709 - improve warning text */ |
1844 | 2 | TY_(ReportAttrError)(doc, node, attval, ANCHOR_DUPLICATED); |
1845 | 4 | else |
1846 | 4 | TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE); |
1847 | 6 | } |
1848 | 26 | else |
1849 | 26 | AddAnchor( doc, attval->value, node ); |
1850 | 32 | } |
1851 | 58 | } |
1852 | | |
1853 | | void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval ) |
1854 | 2.15k | { |
1855 | 2.15k | Lexer* lexer = doc->lexer; |
1856 | 2.15k | Node *old; |
1857 | | |
1858 | 2.15k | if (!AttrHasValue(attval)) |
1859 | 960 | { |
1860 | 960 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1861 | 960 | return; |
1862 | 960 | } |
1863 | | |
1864 | 1.19k | if (!TY_(IsValidHTMLID)(attval->value)) |
1865 | 3 | { |
1866 | 3 | if (lexer->isvoyager && TY_(IsValidXMLID)(attval->value)) |
1867 | 0 | TY_(ReportAttrError)( doc, node, attval, XML_ID_SYNTAX); |
1868 | 3 | else |
1869 | 3 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1870 | 3 | } |
1871 | | |
1872 | 1.19k | if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) |
1873 | 1.17k | { |
1874 | 1.17k | if (node->implicit) /* Is #709 - improve warning text */ |
1875 | 577 | TY_(ReportAttrError)(doc, node, attval, ANCHOR_DUPLICATED); |
1876 | 596 | else |
1877 | 596 | TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE); |
1878 | 1.17k | } |
1879 | 24 | else |
1880 | 24 | AddAnchor( doc, attval->value, node ); |
1881 | 1.19k | } |
1882 | | |
1883 | | void CheckIs( TidyDocImpl* doc, Node *node, AttVal *attval ) |
1884 | 177 | { |
1885 | 177 | const char *ptr; |
1886 | 177 | Bool go = yes; |
1887 | | |
1888 | | /* `is` MUST NOT be in an autonomous custom tag */ |
1889 | 177 | ptr = strchr(node->element, '-'); |
1890 | 177 | if ( ( ptr && (ptr - node->element > 0) ) ) |
1891 | 0 | { |
1892 | 0 | TY_(ReportAttrError)( doc, node, attval, ATTRIBUTE_IS_NOT_ALLOWED); |
1893 | 0 | } |
1894 | | |
1895 | | /* Even if we fail the above test, we'll continue to emit reports because |
1896 | | the user should *also* know that his attribute values are wrong, even |
1897 | | if they shouldn't be in custom tags anyway. */ |
1898 | | |
1899 | | /* `is` MUST have a value */ |
1900 | 177 | if (!AttrHasValue(attval)) |
1901 | 176 | { |
1902 | 176 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1903 | 176 | return; |
1904 | 176 | } |
1905 | | |
1906 | | /* `is` MUST contain a hyphen and no space. */ |
1907 | 1 | ptr = strchr(attval->value, '-'); |
1908 | 1 | go = ( ptr && (ptr - attval->value > 0) ); |
1909 | 1 | ptr = strchr(attval->value, ' '); |
1910 | 1 | go = go & (ptr == NULL); |
1911 | 1 | if ( !go ) |
1912 | 1 | { |
1913 | 1 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1914 | 1 | } |
1915 | 1 | } |
1916 | | |
1917 | | void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval) |
1918 | 8 | { |
1919 | 8 | if (!AttrHasValue(attval)) |
1920 | 0 | return; |
1921 | | |
1922 | 8 | CheckLowerCaseAttrValue( doc, node, attval ); |
1923 | 8 | } |
1924 | | |
1925 | | void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval) |
1926 | 2 | { |
1927 | 2 | ctmbstr const values[] = {"left", "right", "center", "justify", NULL}; |
1928 | | |
1929 | | /* IMG, OBJECT, APPLET and EMBED use align for vertical position */ |
1930 | 2 | if (node->tag && (node->tag->model & CM_IMG)) |
1931 | 0 | { |
1932 | 0 | CheckValign( doc, node, attval ); |
1933 | 0 | return; |
1934 | 0 | } |
1935 | | |
1936 | 2 | if (!AttrHasValue(attval)) |
1937 | 0 | { |
1938 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1939 | 0 | return; |
1940 | 0 | } |
1941 | | |
1942 | 2 | CheckLowerCaseAttrValue( doc, node, attval); |
1943 | | |
1944 | | /* currently CheckCaption(...) takes care of the remaining cases */ |
1945 | 2 | if (nodeIsCAPTION(node)) |
1946 | 0 | return; |
1947 | | |
1948 | 2 | if (!AttrValueIsAmong(attval, values)) |
1949 | 0 | { |
1950 | | /* align="char" is allowed for elements with CM_TABLE|CM_ROW |
1951 | | except CAPTION which is excluded above, */ |
1952 | 0 | if( !(AttrValueIs(attval, "char") |
1953 | 0 | && TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)) ) |
1954 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1955 | 0 | } |
1956 | 2 | } |
1957 | | |
1958 | | void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval) |
1959 | 0 | { |
1960 | 0 | ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL}; |
1961 | 0 | ctmbstr const values2[] = {"left", "right", NULL}; |
1962 | 0 | ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom", |
1963 | 0 | "textbottom", NULL}; |
1964 | |
|
1965 | 0 | if (!AttrHasValue(attval)) |
1966 | 0 | { |
1967 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1968 | 0 | return; |
1969 | 0 | } |
1970 | | |
1971 | 0 | CheckLowerCaseAttrValue( doc, node, attval ); |
1972 | |
|
1973 | 0 | if (AttrValueIsAmong(attval, values)) |
1974 | 0 | { |
1975 | | /* all is fine */ |
1976 | 0 | } |
1977 | 0 | else if (AttrValueIsAmong(attval, values2)) |
1978 | 0 | { |
1979 | 0 | if (!(node->tag && (node->tag->model & CM_IMG))) |
1980 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1981 | 0 | } |
1982 | 0 | else if (AttrValueIsAmong(attval, valuesp)) |
1983 | 0 | { |
1984 | 0 | TY_(ConstrainVersion)( doc, VERS_PROPRIETARY ); |
1985 | 0 | TY_(ReportAttrError)( doc, node, attval, PROPRIETARY_ATTR_VALUE); |
1986 | 0 | } |
1987 | 0 | else |
1988 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
1989 | 0 | } |
1990 | | |
1991 | | void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval) |
1992 | 0 | { |
1993 | 0 | tmbstr p; |
1994 | | |
1995 | 0 | if (!AttrHasValue(attval)) |
1996 | 0 | { |
1997 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
1998 | 0 | return; |
1999 | 0 | } |
2000 | | |
2001 | | /* don't check for <col width=...> and <colgroup width=...> */ |
2002 | 0 | if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node))) |
2003 | 0 | return; |
2004 | | |
2005 | 0 | p = attval->value; |
2006 | | |
2007 | 0 | if (!TY_(IsDigit)(*p++)) |
2008 | 0 | { |
2009 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2010 | 0 | } |
2011 | 0 | else |
2012 | 0 | { |
2013 | 0 | Bool percentFound = no; |
2014 | 0 | while (*p) |
2015 | 0 | { |
2016 | 0 | if (!percentFound && *p == '%') |
2017 | 0 | { |
2018 | 0 | percentFound = yes; |
2019 | 0 | } |
2020 | 0 | else if (percentFound || !TY_(IsDigit)(*p)) |
2021 | 0 | { |
2022 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2023 | 0 | break; |
2024 | 0 | } |
2025 | | |
2026 | 0 | ++p; |
2027 | 0 | } |
2028 | 0 | } |
2029 | 0 | } |
2030 | | |
2031 | | void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval) |
2032 | 420 | { |
2033 | 420 | ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL}; |
2034 | | |
2035 | 420 | if (!AttrHasValue(attval)) |
2036 | 0 | { |
2037 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2038 | 0 | return; |
2039 | 0 | } |
2040 | | |
2041 | | /* target names must begin with A-Za-z ... */ |
2042 | 420 | if (TY_(IsLetter)(attval->value[0])) |
2043 | 0 | return; |
2044 | | |
2045 | | /* or be one of the allowed list */ |
2046 | 420 | if (!AttrValueIsAmong(attval, values)) |
2047 | 2 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2048 | 420 | } |
2049 | | |
2050 | | void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval) |
2051 | 0 | { |
2052 | 0 | ctmbstr const values[] = {"get", "post", NULL}; |
2053 | 0 | CheckAttrValidity( doc, node, attval, values ); |
2054 | 0 | } |
2055 | | |
2056 | | void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval) |
2057 | 0 | { |
2058 | 0 | ctmbstr const values[] = {"none", "left", "right", "all", NULL}; |
2059 | |
|
2060 | 0 | if (!AttrHasValue(attval)) |
2061 | 0 | { |
2062 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2063 | 0 | if (attval->value == NULL) |
2064 | 0 | attval->value = TY_(tmbstrdup)( doc->allocator, "none" ); |
2065 | 0 | return; |
2066 | 0 | } |
2067 | | |
2068 | 0 | CheckLowerCaseAttrValue( doc, node, attval ); |
2069 | | |
2070 | 0 | if (!AttrValueIsAmong(attval, values)) |
2071 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2072 | 0 | } |
2073 | | |
2074 | | void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval) |
2075 | 0 | { |
2076 | 0 | ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL}; |
2077 | 0 | CheckAttrValidity( doc, node, attval, values ); |
2078 | 0 | } |
2079 | | |
2080 | | void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval) |
2081 | 418 | { |
2082 | 418 | ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL}; |
2083 | 418 | CheckAttrValidity( doc, node, attval, values ); |
2084 | 418 | } |
2085 | | |
2086 | | void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval) |
2087 | 0 | { |
2088 | 0 | tmbstr p; |
2089 | | |
2090 | 0 | if (!AttrHasValue(attval)) |
2091 | 0 | { |
2092 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2093 | 0 | return; |
2094 | 0 | } |
2095 | | |
2096 | | /* don't check <frameset cols=... rows=...> */ |
2097 | 0 | if ( nodeIsFRAMESET(node) && |
2098 | 0 | (attrIsCOLS(attval) || attrIsROWS(attval))) |
2099 | 0 | return; |
2100 | | |
2101 | 0 | p = attval->value; |
2102 | | |
2103 | | /* font size may be preceded by + or - */ |
2104 | 0 | if ( nodeIsFONT(node) && (*p == '+' || *p == '-') ) |
2105 | 0 | ++p; |
2106 | | /* tabindex may be preceded by - */ |
2107 | 0 | if (attval->attribute && (strcmp(attval->attribute,"tabindex") == 0) && (*p == '-')) |
2108 | 0 | ++p; |
2109 | |
|
2110 | 0 | while (*p) |
2111 | 0 | { |
2112 | 0 | if (!TY_(IsDigit)(*p)) |
2113 | 0 | { |
2114 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2115 | 0 | break; |
2116 | 0 | } |
2117 | 0 | ++p; |
2118 | 0 | } |
2119 | 0 | } |
2120 | | |
2121 | | /* check hexadecimal color value */ |
2122 | | static Bool IsValidColorCode(ctmbstr color) |
2123 | 9 | { |
2124 | 9 | uint i; |
2125 | | |
2126 | 9 | if (TY_(tmbstrlen)(color) != 6) |
2127 | 9 | return no; |
2128 | | |
2129 | | /* check if valid hex digits and letters */ |
2130 | 0 | for (i = 0; i < 6; i++) |
2131 | 0 | if (!TY_(IsDigit)(color[i]) && !strchr("abcdef", TY_(ToLower)(color[i]))) |
2132 | 0 | return no; |
2133 | | |
2134 | 0 | return yes; |
2135 | 0 | } |
2136 | | |
2137 | | /* check color syntax and beautify value by option */ |
2138 | | void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval) |
2139 | 461 | { |
2140 | 461 | Bool valid = no; |
2141 | 461 | tmbstr given; |
2142 | | |
2143 | 461 | if (!AttrHasValue(attval)) |
2144 | 452 | { |
2145 | 452 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2146 | 452 | return; |
2147 | 452 | } |
2148 | | |
2149 | 9 | given = attval->value; |
2150 | | |
2151 | | /* 727851 - add hash to hash-less color values */ |
2152 | 9 | if (given[0] != '#' && (valid = IsValidColorCode(given))) |
2153 | 0 | { |
2154 | 0 | tmbstr cp, s; |
2155 | |
|
2156 | 0 | cp = s = (tmbstr) TidyDocAlloc(doc, 2 + TY_(tmbstrlen)(given)); |
2157 | 0 | *cp++ = '#'; |
2158 | 0 | while ('\0' != (*cp++ = *given++)) |
2159 | 0 | continue; |
2160 | |
|
2161 | 0 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED); |
2162 | |
|
2163 | 0 | TidyDocFree(doc, attval->value); |
2164 | 0 | given = attval->value = s; |
2165 | 0 | } |
2166 | | |
2167 | 9 | if (!valid && given[0] == '#') |
2168 | 0 | valid = IsValidColorCode(given + 1); |
2169 | | |
2170 | 9 | if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor)) |
2171 | 0 | { |
2172 | 0 | ctmbstr newName = GetColorName(given, TY_(IsHTML5Mode)(doc)); |
2173 | |
|
2174 | 0 | if (newName) |
2175 | 0 | { |
2176 | 0 | TidyDocFree(doc, attval->value); |
2177 | 0 | given = attval->value = TY_(tmbstrdup)(doc->allocator, newName); |
2178 | 0 | } |
2179 | 0 | } |
2180 | | |
2181 | | /* if it is not a valid color code, it is a color name */ |
2182 | 9 | if (!valid) |
2183 | 9 | valid = GetColorCode(given, TY_(IsHTML5Mode)(doc)) != NULL; |
2184 | | |
2185 | 9 | if (valid && given[0] == '#') |
2186 | 0 | attval->value = TY_(tmbstrtoupper)(attval->value); |
2187 | 9 | else if (valid) |
2188 | 0 | attval->value = TY_(tmbstrtolower)(attval->value); |
2189 | | |
2190 | 9 | if (!valid) |
2191 | 9 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2192 | 9 | } |
2193 | | |
2194 | | /* check valuetype attribute for element param */ |
2195 | | void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval) |
2196 | 0 | { |
2197 | 0 | ctmbstr const values[] = {"data", "object", "ref", NULL}; |
2198 | 0 | CheckAttrValidity( doc, node, attval, values ); |
2199 | 0 | } |
2200 | | |
2201 | | /* checks scrolling attribute */ |
2202 | | void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval) |
2203 | 0 | { |
2204 | 0 | ctmbstr const values[] = {"no", "auto", "yes", NULL}; |
2205 | 0 | CheckAttrValidity( doc, node, attval, values ); |
2206 | 0 | } |
2207 | | |
2208 | | /* checks dir attribute */ |
2209 | | void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval) |
2210 | 10 | { |
2211 | 10 | ctmbstr const values4[] = { "rtl", "ltr", NULL }; |
2212 | | /* PR #712 - add 'auto' for HTML5 - @doronbehar */ |
2213 | 10 | ctmbstr const values5[] = { "rtl", "ltr", "auto", NULL }; |
2214 | 10 | CheckAttrValidity(doc, node, attval, |
2215 | 10 | (TY_(IsHTML5Mode)(doc) ? values5 : values4)); |
2216 | 10 | } |
2217 | | |
2218 | | /* checks lang and xml:lang attributes */ |
2219 | | void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval) |
2220 | 10 | { |
2221 | | /* empty xml:lang is allowed through XML 1.0 SE errata */ |
2222 | 10 | if (!AttrHasValue(attval) && !attrIsXML_LANG(attval)) |
2223 | 0 | { |
2224 | 0 | if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) |
2225 | 0 | { |
2226 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE ); |
2227 | 0 | } |
2228 | 0 | return; |
2229 | 0 | } |
2230 | 10 | } |
2231 | | |
2232 | | /* checks loading attribute */ |
2233 | | void CheckLoading( TidyDocImpl* doc, Node *node, AttVal *attval) |
2234 | 0 | { |
2235 | 0 | ctmbstr const values[] = {"lazy", "eager", NULL}; |
2236 | 0 | CheckAttrValidity( doc, node, attval, values ); |
2237 | 0 | } |
2238 | | |
2239 | | /* checks type attribute */ |
2240 | | void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval) |
2241 | 32 | { |
2242 | 32 | ctmbstr const valuesINPUT[] = { |
2243 | 32 | "text", "password", "checkbox", "radio", "submit", "reset", "file", |
2244 | 32 | "hidden", "image", "button", "color", "date", "datetime", |
2245 | 32 | "datetime-local", "email", "month", "number", "range", "search", |
2246 | 32 | "tel", "time", "url", "week", NULL}; |
2247 | 32 | ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL}; |
2248 | 32 | ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL}; |
2249 | 32 | ctmbstr const valuesOL[] = {"1", "a", "i", NULL}; |
2250 | | |
2251 | 32 | if (nodeIsINPUT(node)) |
2252 | 0 | CheckAttrValidity( doc, node, attval, valuesINPUT ); |
2253 | 32 | else if (nodeIsBUTTON(node)) |
2254 | 0 | CheckAttrValidity( doc, node, attval, valuesBUTTON ); |
2255 | 32 | else if (nodeIsUL(node)) |
2256 | 0 | CheckAttrValidity( doc, node, attval, valuesUL ); |
2257 | 32 | else if (nodeIsOL(node)) |
2258 | 30 | { |
2259 | 30 | if (!AttrHasValue(attval)) |
2260 | 30 | { |
2261 | 30 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2262 | 30 | return; |
2263 | 30 | } |
2264 | 0 | if (!AttrValueIsAmong(attval, valuesOL)) |
2265 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2266 | 0 | } |
2267 | 2 | else if (nodeIsLI(node)) |
2268 | 0 | { |
2269 | 0 | if (!AttrHasValue(attval)) |
2270 | 0 | { |
2271 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2272 | 0 | return; |
2273 | 0 | } |
2274 | 0 | if (AttrValueIsAmong(attval, valuesUL)) |
2275 | 0 | CheckLowerCaseAttrValue( doc, node, attval ); |
2276 | 0 | else if (!AttrValueIsAmong(attval, valuesOL)) |
2277 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2278 | 0 | } |
2279 | 2 | return; |
2280 | 32 | } |
2281 | | |
2282 | | static void CheckDecimal( TidyDocImpl* doc, Node *node, AttVal *attval) |
2283 | 0 | { |
2284 | 0 | tmbstr p; |
2285 | 0 | Bool hasPoint = no; |
2286 | |
|
2287 | 0 | p = attval->value; |
2288 | | |
2289 | | /* Allow leading sign */ |
2290 | 0 | if (*p == '+' || *p == '-') |
2291 | 0 | ++p; |
2292 | |
|
2293 | 0 | while (*p) |
2294 | 0 | { |
2295 | | /* Allow a single decimal point */ |
2296 | 0 | if (*p == '.') |
2297 | 0 | { |
2298 | 0 | if (!hasPoint) |
2299 | 0 | hasPoint = yes; |
2300 | 0 | else |
2301 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2302 | 0 | break; |
2303 | 0 | } |
2304 | | |
2305 | 0 | if (!TY_(IsDigit)(*p)) |
2306 | 0 | { |
2307 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2308 | 0 | break; |
2309 | 0 | } |
2310 | 0 | ++p; |
2311 | 0 | } |
2312 | 0 | } |
2313 | | |
2314 | | static Bool IsSvgPaintAttr(AttVal *attval) |
2315 | 23 | { |
2316 | 23 | return attrIsCOLOR(attval) |
2317 | 23 | || attrIsSVG_FILL(attval) |
2318 | 23 | || attrIsSVG_FILLRULE(attval) |
2319 | 23 | || attrIsSVG_STROKE(attval) |
2320 | 23 | || attrIsSVG_STROKEDASHARRAY(attval) |
2321 | 23 | || attrIsSVG_STROKEDASHOFFSET(attval) |
2322 | 23 | || attrIsSVG_STROKELINECAP(attval) |
2323 | 23 | || attrIsSVG_STROKELINEJOIN(attval) |
2324 | 23 | || attrIsSVG_STROKEMITERLIMIT(attval) |
2325 | 23 | || attrIsSVG_STROKEWIDTH(attval) |
2326 | 23 | || attrIsSVG_COLORINTERPOLATION(attval) |
2327 | 23 | || attrIsSVG_COLORRENDERING(attval) |
2328 | 23 | || attrIsSVG_OPACITY(attval) |
2329 | 23 | || attrIsSVG_STROKEOPACITY(attval) |
2330 | 23 | || attrIsSVG_FILLOPACITY(attval); |
2331 | 23 | } |
2332 | | |
2333 | | /* Check SVG attributes */ |
2334 | | static void CheckSvgAttr( TidyDocImpl* doc, Node *node, AttVal *attval) |
2335 | 23 | { |
2336 | 23 | if (!nodeIsSVG(node)) |
2337 | 0 | { |
2338 | 0 | TY_(ReportAttrError)(doc, node, attval, ATTRIBUTE_IS_NOT_ALLOWED); |
2339 | 0 | return; |
2340 | 0 | } |
2341 | | |
2342 | | /* Issue #903 - check SVG paint attributes */ |
2343 | 23 | if (IsSvgPaintAttr(attval)) |
2344 | 23 | { |
2345 | | /* all valid paint attributes have values */ |
2346 | 23 | if (!AttrHasValue(attval)) |
2347 | 0 | { |
2348 | 0 | TY_(ReportAttrError)(doc, node, attval, MISSING_ATTR_VALUE); |
2349 | 0 | return; |
2350 | 0 | } |
2351 | | /* all paint attributes support an 'inherit' value, |
2352 | | per https://dev.w3.org/SVG/profiles/1.1F2/publish/painting.html#SpecifyingPaint */ |
2353 | 23 | if (AttrValueIs(attval, "inherit")) |
2354 | 0 | { |
2355 | 0 | return; |
2356 | 0 | } |
2357 | | |
2358 | | /* check paint datatypes |
2359 | | see https://dev.w3.org/SVG/profiles/1.1F2/publish/painting.html#SpecifyingPaint |
2360 | | */ |
2361 | 23 | if (attrIsSVG_FILL(attval) || attrIsSVG_STROKE(attval)) |
2362 | 9 | { |
2363 | | /* TODO: support funciri */ |
2364 | 9 | static ctmbstr const values[] = { |
2365 | 9 | "none", "currentColor", NULL}; |
2366 | | |
2367 | 9 | if (AttrValueIsAmong(attval, values)) |
2368 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2369 | 9 | else |
2370 | 9 | CheckColor(doc, node, attval); |
2371 | 9 | } |
2372 | 14 | else if (attrIsSVG_FILLRULE(attval)) |
2373 | 0 | { |
2374 | 0 | static ctmbstr const values[] = {"nonzero", "evenodd", NULL}; |
2375 | |
|
2376 | 0 | if (AttrValueIsAmong(attval, values)) |
2377 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2378 | 0 | else |
2379 | 0 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2380 | 0 | } |
2381 | 14 | else if (attrIsSVG_STROKEDASHARRAY(attval)) |
2382 | 0 | { |
2383 | 0 | static ctmbstr const values[] = {"none", NULL}; |
2384 | |
|
2385 | 0 | if (AttrValueIsAmong(attval, values)) |
2386 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2387 | 0 | else |
2388 | 0 | { |
2389 | | /* TODO: process dash arrays */ |
2390 | 0 | } |
2391 | 0 | } |
2392 | 14 | else if (attrIsSVG_STROKEDASHOFFSET(attval)) |
2393 | 0 | { |
2394 | 0 | CheckLength(doc, node, attval); |
2395 | 0 | } |
2396 | 14 | else if (attrIsSVG_STROKELINECAP(attval)) |
2397 | 0 | { |
2398 | 0 | static ctmbstr const values[] = {"butt", "round", "square", NULL}; |
2399 | |
|
2400 | 0 | if (AttrValueIsAmong(attval, values)) |
2401 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2402 | 0 | else |
2403 | 0 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2404 | 0 | } |
2405 | 14 | else if (attrIsSVG_STROKELINEJOIN(attval)) |
2406 | 14 | { |
2407 | 14 | static ctmbstr const values[] = {"miter", "round", "bevel", NULL}; |
2408 | | |
2409 | 14 | if (AttrValueIsAmong(attval, values)) |
2410 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2411 | 14 | else |
2412 | 14 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2413 | 14 | } |
2414 | 0 | else if (attrIsSVG_STROKEMITERLIMIT(attval)) |
2415 | 0 | { |
2416 | 0 | CheckNumber(doc, node, attval); |
2417 | 0 | } |
2418 | 0 | else if (attrIsSVG_STROKEWIDTH(attval)) |
2419 | 0 | { |
2420 | 0 | CheckLength(doc, node, attval); |
2421 | 0 | } |
2422 | 0 | else if (attrIsSVG_COLORINTERPOLATION(attval)) |
2423 | 0 | { |
2424 | 0 | static ctmbstr const values[] = {"auto", "sRGB", "linearRGB", NULL}; |
2425 | |
|
2426 | 0 | if (AttrValueIsAmong(attval, values)) |
2427 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2428 | 0 | else |
2429 | 0 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2430 | 0 | } |
2431 | 0 | else if (attrIsSVG_COLORRENDERING(attval)) |
2432 | 0 | { |
2433 | 0 | static ctmbstr const values[] = { |
2434 | 0 | "auto", "optimizeSpeed", "optimizeQuality", NULL}; |
2435 | |
|
2436 | 0 | if (AttrValueIsAmong(attval, values)) |
2437 | 0 | CheckLowerCaseAttrValue(doc, node, attval); |
2438 | 0 | else |
2439 | 0 | TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2440 | 0 | } |
2441 | 0 | else if(attrIsSVG_OPACITY(attval)) |
2442 | 0 | { |
2443 | 0 | CheckDecimal(doc, node, attval); |
2444 | 0 | } |
2445 | 0 | else if(attrIsSVG_STROKEOPACITY(attval)) |
2446 | 0 | { |
2447 | 0 | CheckDecimal(doc, node, attval); |
2448 | 0 | } |
2449 | 0 | else if(attrIsSVG_FILLOPACITY(attval)) |
2450 | 0 | { |
2451 | 0 | CheckDecimal(doc, node, attval); |
2452 | 0 | } |
2453 | 23 | } |
2454 | 23 | } |
2455 | | |
2456 | | static |
2457 | | AttVal *SortAttVal( TidyDocImpl* doc, AttVal* list, TidyAttrSortStrategy strat ); |
2458 | | |
2459 | | void TY_(SortAttributes)(TidyDocImpl* doc, Node* node, TidyAttrSortStrategy strat) |
2460 | 153k | { |
2461 | 447k | while (node) |
2462 | 294k | { |
2463 | 294k | node->attributes = SortAttVal( doc, node->attributes, strat ); |
2464 | 294k | if (node->content) |
2465 | 153k | TY_(SortAttributes)(doc, node->content, strat); |
2466 | 294k | node = node->next; |
2467 | 294k | } |
2468 | 153k | } |
2469 | | |
2470 | | /** |
2471 | | * Attribute sorting contributed by Adrian Wilkins, 2007 |
2472 | | * |
2473 | | * Portions copyright Simon Tatham 2001. |
2474 | | * |
2475 | | * Merge sort algorithm adapted from listsort.c linked from |
2476 | | * https://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html |
2477 | | * |
2478 | | * Original copyright notice proceeds below. |
2479 | | * |
2480 | | * Permission is hereby granted, free of charge, to any person |
2481 | | * obtaining a copy of this software and associated documentation |
2482 | | * files (the "Software"), to deal in the Software without |
2483 | | * restriction, including without limitation the rights to use, |
2484 | | * copy, modify, merge, publish, distribute, sublicense, and/or |
2485 | | * sell copies of the Software, and to permit persons to whom the |
2486 | | * Software is furnished to do so, subject to the following |
2487 | | * conditions: |
2488 | | * |
2489 | | * The above copyright notice and this permission notice shall be |
2490 | | * included in all copies or substantial portions of the Software. |
2491 | | * |
2492 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
2493 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
2494 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
2495 | | * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR |
2496 | | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
2497 | | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
2498 | | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
2499 | | * SOFTWARE. |
2500 | | */ |
2501 | | |
2502 | | typedef int(*ptAttValComparator)(AttVal *one, AttVal *two, ctmbstr *list); |
2503 | | |
2504 | | /* Returns the index of the item in the array, or -1 if not in the array */ |
2505 | | static |
2506 | | int indexof( ctmbstr item, ctmbstr *list ) |
2507 | 0 | { |
2508 | 0 | if ( list ) |
2509 | 0 | { |
2510 | 0 | uint i = 0; |
2511 | 0 | while ( list[i] != NULL ) { |
2512 | 0 | if ( TY_(tmbstrcasecmp)(item, list[i]) == 0 ) |
2513 | 0 | return i; |
2514 | 0 | i++; |
2515 | 0 | } |
2516 | 0 | } |
2517 | | |
2518 | 0 | return -1; |
2519 | 0 | } |
2520 | | |
2521 | | /* Comparison function for TidySortAttrAlpha. Will also consider items in |
2522 | | the passed in list as higher-priority, and will group them first. |
2523 | | */ |
2524 | | static |
2525 | | int AlphaComparator(AttVal *one, AttVal *two, ctmbstr *list) |
2526 | 0 | { |
2527 | 0 | int oneIndex = indexof( one->attribute, list ); |
2528 | 0 | int twoIndex = indexof( two->attribute, list ); |
2529 | | |
2530 | | /* If both on the list, the lower index has priority. */ |
2531 | 0 | if ( oneIndex >= 0 && twoIndex >= 0 ) |
2532 | 0 | return oneIndex < twoIndex ? -1 : 1; |
2533 | | |
2534 | | /* If A on the list but B not on the list, then A has priority. */ |
2535 | 0 | if ( oneIndex >= 0 && twoIndex == -1 ) |
2536 | 0 | return -1; |
2537 | | |
2538 | | /* If A not on the list but B is on the list, then B has priority. */ |
2539 | 0 | if ( oneIndex == -1 && twoIndex >= 0 ) |
2540 | 0 | return 1; |
2541 | | |
2542 | | /* Otherwise nothing is on the list, so just compare strings. */ |
2543 | 0 | return TY_(tmbstrcmp)(one->attribute, two->attribute); |
2544 | 0 | } |
2545 | | |
2546 | | |
2547 | | /* Comparison function for prioritizing list items. It doesn't otherwise |
2548 | | sort. |
2549 | | */ |
2550 | | static |
2551 | | int PriorityComparator(AttVal *one, AttVal *two, ctmbstr *list) |
2552 | 0 | { |
2553 | 0 | int oneIndex = indexof( one->attribute, list ); |
2554 | 0 | int twoIndex = indexof( two->attribute, list ); |
2555 | | |
2556 | | /* If both on the list, the lower index has priority. */ |
2557 | 0 | if ( oneIndex >= 0 && twoIndex >= 0 ) |
2558 | 0 | return oneIndex < twoIndex ? -1 : 1; |
2559 | | |
2560 | | /* If A on the list but B not on the list, then A has priority. */ |
2561 | 0 | if ( oneIndex >= 0 && twoIndex == -1 ) |
2562 | 0 | return -1; |
2563 | | |
2564 | | /* If A not on the list but B is on the list, then B has priority. */ |
2565 | 0 | if ( oneIndex == -1 && twoIndex >= 0 ) |
2566 | 0 | return 1; |
2567 | | |
2568 | | /* Otherwise nothing is on the list, so just mark them as the same. */ |
2569 | 0 | return 0; |
2570 | 0 | } |
2571 | | |
2572 | | |
2573 | | /* The "factory method" that returns a pointer to the comparator function */ |
2574 | | static |
2575 | | ptAttValComparator GetAttValComparator(TidyAttrSortStrategy strat, ctmbstr *list) |
2576 | 294k | { |
2577 | 294k | switch (strat) |
2578 | 294k | { |
2579 | 0 | case TidySortAttrAlpha: |
2580 | 0 | return AlphaComparator; |
2581 | 294k | case TidySortAttrNone: |
2582 | 294k | if ( list && list[0] ) |
2583 | 0 | return PriorityComparator; |
2584 | 294k | break; |
2585 | 294k | } |
2586 | 294k | return 0; |
2587 | 294k | } |
2588 | | |
2589 | | /* The sort routine */ |
2590 | | static |
2591 | | AttVal *SortAttVal( TidyDocImpl* doc, AttVal *list, TidyAttrSortStrategy strat) |
2592 | 294k | { |
2593 | | /* Get the list from the passed-in tidyDoc. */ |
2594 | 294k | ctmbstr* priorityList = (ctmbstr*)doc->attribs.priorityAttribs.list; |
2595 | | |
2596 | 294k | ptAttValComparator ptComparator = GetAttValComparator(strat, priorityList); |
2597 | 294k | AttVal *p, *q, *e, *tail; |
2598 | 294k | int insize, nmerges, psize, qsize, i; |
2599 | | |
2600 | | /* |
2601 | | * Silly special case: if `list' was passed in as NULL, return |
2602 | | * NULL immediately. |
2603 | | */ |
2604 | 294k | if (!list) |
2605 | 270k | return NULL; |
2606 | | |
2607 | | /* If no comparator, return the list as-is */ |
2608 | 23.4k | if (ptComparator == 0) |
2609 | 23.4k | return list; |
2610 | | |
2611 | 0 | insize = 1; |
2612 | |
|
2613 | 0 | while (1) { |
2614 | 0 | p = list; |
2615 | 0 | list = NULL; |
2616 | 0 | tail = NULL; |
2617 | |
|
2618 | 0 | nmerges = 0; /* count number of merges we do in this pass */ |
2619 | |
|
2620 | 0 | while (p) { |
2621 | 0 | nmerges++; /* there exists a merge to be done */ |
2622 | | /* step `insize' places along from p */ |
2623 | 0 | q = p; |
2624 | 0 | psize = 0; |
2625 | 0 | for (i = 0; i < insize; i++) { |
2626 | 0 | psize++; |
2627 | 0 | q = q->next; |
2628 | 0 | if(!q) break; |
2629 | 0 | } |
2630 | | |
2631 | | /* if q hasn't fallen off end, we have two lists to merge */ |
2632 | 0 | qsize = insize; |
2633 | | |
2634 | | /* now we have two lists; merge them */ |
2635 | 0 | while (psize > 0 || (qsize > 0 && q)) { |
2636 | | |
2637 | | /* decide whether next element of merge comes from p or q */ |
2638 | 0 | if (psize == 0) { |
2639 | | /* p is empty; e must come from q. */ |
2640 | 0 | e = q; q = q->next; qsize--; |
2641 | 0 | } else if (qsize == 0 || !q) { |
2642 | | /* q is empty; e must come from p. */ |
2643 | 0 | e = p; p = p->next; psize--; |
2644 | 0 | } else if (ptComparator(p,q, priorityList) <= 0) { |
2645 | | /* First element of p is lower (or same); |
2646 | | * e must come from p. */ |
2647 | 0 | e = p; p = p->next; psize--; |
2648 | 0 | } else { |
2649 | | /* First element of q is lower; e must come from q. */ |
2650 | 0 | e = q; q = q->next; qsize--; |
2651 | 0 | } |
2652 | | |
2653 | | /* add the next element to the merged list */ |
2654 | 0 | if (tail) { |
2655 | 0 | tail->next = e; |
2656 | 0 | } else { |
2657 | 0 | list = e; |
2658 | 0 | } |
2659 | |
|
2660 | 0 | tail = e; |
2661 | 0 | } |
2662 | | |
2663 | | /* now p has stepped `insize' places along, and q has too */ |
2664 | 0 | p = q; |
2665 | 0 | } |
2666 | |
|
2667 | 0 | tail->next = NULL; |
2668 | | |
2669 | | /* If we have done only one merge, we're finished. */ |
2670 | 0 | if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ |
2671 | 0 | return list; |
2672 | | |
2673 | | /* Otherwise repeat, merging lists twice the size */ |
2674 | 0 | insize *= 2; |
2675 | 0 | } |
2676 | 0 | } |
2677 | | |
2678 | | /* RDFA support checkers |
2679 | | * |
2680 | | */ |
2681 | | |
2682 | | /* CheckRDFAPrefix - ensure the prefix attribute value is |
2683 | | * correct |
2684 | | * |
2685 | | * @prefix takes prefix value pairs in the form: |
2686 | | * |
2687 | | * NCName ':' ' '+ AnyURI |
2688 | | */ |
2689 | | |
2690 | | void CheckRDFaPrefix ( TidyDocImpl* doc, Node *node, AttVal *attval) |
2691 | 0 | { |
2692 | 0 | if (!AttrHasValue(attval)) |
2693 | 0 | { |
2694 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2695 | 0 | return; |
2696 | 0 | } |
2697 | | |
2698 | | /* Copy the attribute value so we can split it */ |
2699 | 0 | if (attval->value) { |
2700 | 0 | tmbstr t, tPtr ; |
2701 | |
|
2702 | 0 | uint prefixCount = 0; |
2703 | | /* isPrefix toggles - start at 1 and change to 0 as we |
2704 | | * iterate over the components of the value */ |
2705 | 0 | uint isPrefix = 1; |
2706 | | |
2707 | | /* Copy it over */ |
2708 | |
|
2709 | 0 | uint len = TY_(tmbstrlen)(attval->value); |
2710 | 0 | tmbstr s = (tmbstr) TidyDocAlloc( doc, len + 1 ); |
2711 | 0 | s[0] = '\0'; |
2712 | 0 | TY_(tmbstrcpy)( s, attval->value ); |
2713 | | |
2714 | | /* iterate over value */ |
2715 | 0 | tPtr = s; |
2716 | |
|
2717 | 0 | while ( ( t = strtok(tPtr, " ") ) != NULL ) { |
2718 | 0 | tPtr = NULL; |
2719 | 0 | if (isPrefix) { |
2720 | | /* this piece should be a prefix */ |
2721 | | /* prefix rules are that it can have any |
2722 | | * character except a colon - that one must be |
2723 | | * at the end */ |
2724 | 0 | tmbstr i = strchr(t, ':') ; |
2725 | 0 | if (i == NULL) { |
2726 | | /* no colon - bad! */ |
2727 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2728 | 0 | } else if (i != ( t + TY_(tmbstrlen)(t) - 1) ) { |
2729 | | /* not at the end - also bad */ |
2730 | 0 | TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE); |
2731 | 0 | } |
2732 | 0 | } else { |
2733 | | /* this piece should be a URL */ |
2734 | 0 | prefixCount ++; |
2735 | 0 | } |
2736 | 0 | isPrefix = !isPrefix; |
2737 | 0 | } |
2738 | 0 | TidyDocFree( doc, s ) ; |
2739 | 0 | } |
2740 | 0 | } |
2741 | | |
2742 | | /* CheckRDFaTerm - are terms valid |
2743 | | * |
2744 | | */ |
2745 | | |
2746 | | void CheckRDFaTerm ( TidyDocImpl* doc, Node *node, AttVal *attval) |
2747 | 0 | { |
2748 | 0 | if (!AttrHasValue(attval)) |
2749 | 0 | { |
2750 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2751 | 0 | return; |
2752 | 0 | } |
2753 | |
|
2754 | 0 | } |
2755 | | |
2756 | | /* CheckRDFaSafeCURIE - is a CURIE legal |
2757 | | * |
2758 | | */ |
2759 | | |
2760 | | void CheckRDFaSafeCURIE ( TidyDocImpl* doc, Node *node, AttVal *attval) |
2761 | 0 | { |
2762 | 0 | if (!AttrHasValue(attval)) |
2763 | 0 | { |
2764 | 0 | TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE); |
2765 | 0 | return; |
2766 | 0 | } |
2767 | |
|
2768 | 0 | } |
2769 | | |
2770 | | /* |
2771 | | * local variables: |
2772 | | * mode: c |
2773 | | * indent-tabs-mode: nil |
2774 | | * c-basic-offset: 4 |
2775 | | * eval: (c-set-offset 'substatement-open 0) |
2776 | | * end: |
2777 | | */ |