/src/xmlProtoConverter.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2019 Google Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include "xmlProtoConverter.h" |
18 | | |
19 | | #include <algorithm> |
20 | | |
21 | | using namespace std; |
22 | | using namespace xmlProtoFuzzer; |
23 | | |
24 | | string ProtoConverter::removeNonAscii(string const& _utf8) |
25 | 688k | { |
26 | 688k | string asciiStr{_utf8}; |
27 | 18.5M | asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool { |
28 | 18.5M | return !(std::isalpha(c) || std::isdigit(c)); |
29 | 18.5M | }), asciiStr.end()); |
30 | 688k | return asciiStr.empty() ? "fuzz" : asciiStr; |
31 | 688k | } |
32 | | |
33 | | |
34 | | void ProtoConverter::visit(Misc const& _x) |
35 | 3.59k | { |
36 | 3.59k | switch (_x.misc_oneof_case()) |
37 | 3.59k | { |
38 | 1.60k | case Misc::kComment: |
39 | 1.60k | m_output << "<!--" << _x.comment() << "-->\n"; |
40 | 1.60k | break; |
41 | 1.14k | case Misc::kInst: |
42 | 1.14k | visit(_x.inst()); |
43 | 1.14k | break; |
44 | 849 | case Misc::MISC_ONEOF_NOT_SET: |
45 | 849 | break; |
46 | 3.59k | } |
47 | 3.59k | } |
48 | | |
49 | | void ProtoConverter::visit(Prolog const& _x) |
50 | 7.06k | { |
51 | 7.06k | visit(_x.decl()); |
52 | 7.06k | visit(_x.doctype()); |
53 | 7.06k | for (auto const& misc: _x.misc()) |
54 | 2.22k | visit(misc); |
55 | 7.06k | } |
56 | | |
57 | | void ProtoConverter::visit(KeyValue const& _x) |
58 | 18.5k | { |
59 | 18.5k | if (!KeyValue::XmlNamespace_IsValid(_x.type())) |
60 | 247 | return; |
61 | | |
62 | 18.3k | switch (_x.type()) |
63 | 18.3k | { |
64 | 5.31k | case KeyValue::ATTRIBUTES: |
65 | 5.31k | m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" "; |
66 | 5.31k | break; |
67 | 1.21k | case KeyValue::BASE: |
68 | 1.21k | m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" "; |
69 | 1.21k | break; |
70 | 1.10k | case KeyValue::CATALOG: |
71 | 1.10k | m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" "; |
72 | 1.10k | break; |
73 | 1.62k | case KeyValue::ID: |
74 | 1.62k | m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" "; |
75 | 1.62k | break; |
76 | 573 | case KeyValue::LANG: |
77 | 573 | m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" "; |
78 | 573 | break; |
79 | 349 | case KeyValue::LINK: |
80 | 349 | m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" "; |
81 | 349 | break; |
82 | 741 | case KeyValue::SPACE: |
83 | 741 | m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" "; |
84 | 741 | break; |
85 | 300 | case KeyValue::SPECIAL: |
86 | 300 | m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" "; |
87 | 300 | break; |
88 | 1.28k | case KeyValue::TEST: |
89 | 1.28k | m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" "; |
90 | 1.28k | break; |
91 | 5.83k | case KeyValue::FUZZ: |
92 | 5.83k | if (_x.ByteSizeLong() % 2) |
93 | 1.35k | m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; |
94 | 4.47k | else |
95 | 4.47k | m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; |
96 | 5.83k | break; |
97 | 0 | case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_: |
98 | 0 | case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_: |
99 | 0 | break; |
100 | 18.3k | } |
101 | 18.3k | } |
102 | | |
103 | | void ProtoConverter::visit(ProcessingInstruction const& _x) |
104 | 1.14k | { |
105 | 1.14k | m_output << "<?" << removeNonAscii(_x.name()) << " "; |
106 | 1.14k | for (auto const& prop: _x.kv()) |
107 | 1.30k | visit(prop); |
108 | 1.14k | m_output << "?>\n"; |
109 | 1.14k | } |
110 | | |
111 | | void ProtoConverter::visit(Content const& _x) |
112 | 133k | { |
113 | 133k | switch (_x.content_oneof_case()) |
114 | 133k | { |
115 | 2.28k | case Content::kStr: |
116 | 2.28k | m_output << _x.str() << "\n"; |
117 | 2.28k | break; |
118 | 2.01k | case Content::kE: |
119 | 2.01k | visit(_x.e()); |
120 | 2.01k | m_output << "\n"; |
121 | 2.01k | break; |
122 | 2.15k | case Content::kC: |
123 | 2.15k | visit(_x.c()); |
124 | 2.15k | m_output << "\n"; |
125 | 2.15k | break; |
126 | 126k | case Content::CONTENT_ONEOF_NOT_SET: |
127 | 126k | break; |
128 | 133k | } |
129 | 133k | } |
130 | | |
131 | | void ProtoConverter::visit(ElementDecl const& _x) |
132 | 4.29k | { |
133 | 4.29k | if (!ElementDecl::ContentSpec_IsValid(_x.spec())) |
134 | 231 | return; |
135 | | |
136 | 4.05k | m_output << "<!ELEMENT " << _x.name() << " "; |
137 | 4.05k | switch (_x.spec()) |
138 | 4.05k | { |
139 | 2.12k | case ElementDecl::EMPTY: |
140 | 2.12k | m_output << "EMPTY>"; |
141 | 2.12k | break; |
142 | 157 | case ElementDecl::ANY: |
143 | 157 | m_output << "ANY>"; |
144 | 157 | break; |
145 | 215 | case ElementDecl::FUZZ: |
146 | 215 | m_output << "FUZZ>"; |
147 | 215 | break; |
148 | 856 | case ElementDecl::MIXED: |
149 | 856 | m_output << "(#PCDATA"; |
150 | 856 | for (auto const& pcdata: _x.cdata()) |
151 | 1.91k | m_output << "|" << pcdata; |
152 | 856 | m_output << ")"; |
153 | 856 | if (_x.cdata_size() > 0) |
154 | 595 | m_output << "*"; |
155 | 856 | m_output << ">"; |
156 | 856 | break; |
157 | 709 | case ElementDecl::CHILDREN: |
158 | 709 | { |
159 | 709 | m_output << "("; |
160 | 709 | string delim = ""; |
161 | 1.80k | for (auto const& str: _x.cdata()) { |
162 | 1.80k | m_output << delim << removeNonAscii(str); |
163 | 1.80k | delim = ", "; |
164 | 1.80k | } |
165 | 709 | m_output << ")>"; |
166 | 709 | break; |
167 | 0 | } |
168 | 0 | case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_: |
169 | 0 | case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_: |
170 | 0 | break; |
171 | 4.05k | } |
172 | 4.05k | } |
173 | | |
174 | | void ProtoConverter::visit(AttValue const& _x) |
175 | 3.51k | { |
176 | 3.51k | if (!isValid(_x)) |
177 | 133 | return; |
178 | | |
179 | 3.38k | m_output << "\""; |
180 | 3.38k | string prefix; |
181 | 3.38k | switch (_x.type()) |
182 | 3.38k | { |
183 | 1.88k | case AttValue::ENTITY: |
184 | 1.88k | prefix = "&"; |
185 | 1.88k | break; |
186 | 1.16k | case AttValue::CHAR: |
187 | 1.16k | if (_x.ByteSizeLong() % 2) |
188 | 147 | prefix = "&#"; |
189 | 1.01k | else |
190 | | // TODO: Value that follows this must be a |
191 | | // sequence of hex digits. |
192 | 1.01k | prefix = "&#x"; |
193 | 1.16k | break; |
194 | 339 | case AttValue::FUZZ: |
195 | 339 | prefix = "fuzz"; |
196 | 339 | break; |
197 | 0 | case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
198 | 0 | case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
199 | 0 | break; |
200 | 3.38k | } |
201 | 3.38k | for (auto const& name: _x.value()) |
202 | 5.08k | m_output << prefix << removeNonAscii(name) << ";"; |
203 | 3.38k | m_output << "\""; |
204 | 3.38k | } |
205 | | |
206 | | void ProtoConverter::visit(DefaultDecl const& _x) |
207 | 15.8k | { |
208 | 15.8k | if (!isValid(_x)) |
209 | 147 | return; |
210 | | |
211 | 15.7k | switch (_x.type()) |
212 | 15.7k | { |
213 | 11.6k | case DefaultDecl::REQUIRED: |
214 | 11.6k | m_output << "#REQUIRED"; |
215 | 11.6k | break; |
216 | 327 | case DefaultDecl::IMPLIED: |
217 | 327 | m_output << "#IMPLIED"; |
218 | 327 | break; |
219 | 3.51k | case DefaultDecl::FIXED: |
220 | 3.51k | m_output << "#FIXED "; |
221 | 3.51k | visit(_x.att()); |
222 | 3.51k | break; |
223 | 262 | case DefaultDecl::FUZZ: |
224 | 262 | m_output << "#FUZZ"; |
225 | 262 | break; |
226 | 0 | case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
227 | 0 | case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
228 | 0 | break; |
229 | 15.7k | } |
230 | 15.7k | } |
231 | | |
232 | | void ProtoConverter::visit(AttDef const& _x) |
233 | 15.9k | { |
234 | 15.9k | if (!isValid(_x)) |
235 | 66 | return; |
236 | | |
237 | 15.8k | m_output << " " << removeNonAscii(_x.name()) << " "; |
238 | 15.8k | switch (_x.type()) |
239 | 15.8k | { |
240 | 9.47k | case AttDef::CDATA: |
241 | 9.47k | m_output << "CDATA "; |
242 | 9.47k | break; |
243 | 1.61k | case AttDef::ID: |
244 | 1.61k | m_output << "ID "; |
245 | 1.61k | break; |
246 | 2.09k | case AttDef::IDREF: |
247 | 2.09k | m_output << "IDREF "; |
248 | 2.09k | break; |
249 | 760 | case AttDef::IDREFS: |
250 | 760 | m_output << "IDREFS "; |
251 | 760 | break; |
252 | 510 | case AttDef::ENTITY: |
253 | 510 | m_output << "ENTITY "; |
254 | 510 | break; |
255 | 302 | case AttDef::ENTITIES: |
256 | 302 | m_output << "ENTITIES "; |
257 | 302 | break; |
258 | 470 | case AttDef::NMTOKEN: |
259 | 470 | m_output << "NMTOKEN "; |
260 | 470 | break; |
261 | 529 | case AttDef::NMTOKENS: |
262 | 529 | m_output << "NMTOKENS "; |
263 | 529 | break; |
264 | 110 | case AttDef::FUZZ: |
265 | 110 | m_output << "FUZZ "; |
266 | 110 | break; |
267 | 0 | case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
268 | 0 | case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
269 | 0 | break; |
270 | 15.8k | } |
271 | 15.8k | visit(_x.def()); |
272 | 15.8k | } |
273 | | |
274 | | void ProtoConverter::visit(AttListDecl const& _x) |
275 | 3.31k | { |
276 | 3.31k | m_output << "<!ATTLIST " << removeNonAscii(_x.name()); |
277 | 3.31k | for (auto const& att: _x.attdefs()) |
278 | 15.9k | visit(att); |
279 | 3.31k | m_output << ">"; |
280 | 3.31k | } |
281 | | |
282 | | void ProtoConverter::visit(NotationDecl const& _x) |
283 | 3.92k | { |
284 | 3.92k | m_output << "<!NOTATION " << removeNonAscii(_x.name()) << " "; |
285 | 3.92k | switch (_x.notation_oneof_case()) |
286 | 3.92k | { |
287 | 1.00k | case NotationDecl::kExt: |
288 | 1.00k | visit(_x.ext()); |
289 | 1.00k | break; |
290 | 1.93k | case NotationDecl::kPub: |
291 | 1.93k | m_output << "PUBLIC " << _x.pub(); |
292 | 1.93k | break; |
293 | 239 | case NotationDecl::kFuzz: |
294 | 239 | m_output << "FUZZ " << _x.fuzz(); |
295 | 239 | break; |
296 | 747 | case NotationDecl::NOTATION_ONEOF_NOT_SET: |
297 | 747 | break; |
298 | 3.92k | } |
299 | 3.92k | m_output << ">"; |
300 | 3.92k | } |
301 | | |
302 | | void ProtoConverter::visit(NDataDecl const& _x) |
303 | 1.57k | { |
304 | 1.57k | m_output << " NDATA " << _x.name(); |
305 | 1.57k | } |
306 | | |
307 | | void ProtoConverter::visit(EntityDef const& _x) |
308 | 5.62k | { |
309 | 5.62k | switch (_x.entity_oneof_case()) |
310 | 5.62k | { |
311 | 2.15k | case EntityDef::kExt: |
312 | 2.15k | visit(_x.ext()); |
313 | 2.15k | if (_x.ByteSizeLong() % 2) |
314 | 1.57k | visit(_x.ndata()); |
315 | 2.15k | break; |
316 | 1.90k | case EntityDef::kVal: |
317 | 1.90k | visit(_x.val()); |
318 | 1.90k | break; |
319 | 1.56k | case EntityDef::ENTITY_ONEOF_NOT_SET: |
320 | 1.56k | break; |
321 | 5.62k | } |
322 | 5.62k | } |
323 | | |
324 | | void ProtoConverter::visit(PEDef const& _x) |
325 | 3.43k | { |
326 | 3.43k | switch (_x.pedef_oneof_case()) |
327 | 3.43k | { |
328 | 1.46k | case PEDef::kVal: |
329 | 1.46k | visit(_x.val()); |
330 | 1.46k | break; |
331 | 1.14k | case PEDef::kId: |
332 | 1.14k | visit(_x.id()); |
333 | 1.14k | break; |
334 | 825 | case PEDef::PEDEF_ONEOF_NOT_SET: |
335 | 825 | break; |
336 | 3.43k | } |
337 | 3.43k | } |
338 | | |
339 | | void ProtoConverter::visit(EntityValue const& _x) |
340 | 3.36k | { |
341 | 3.36k | if (!isValid(_x)) |
342 | 97 | return; |
343 | | |
344 | 3.27k | m_output << "\""; |
345 | 3.27k | string prefix; |
346 | 3.27k | switch (_x.type()) |
347 | 3.27k | { |
348 | 1.47k | case EntityValue::ENTITY: |
349 | 1.47k | prefix = "&"; |
350 | 1.47k | break; |
351 | 1.33k | case EntityValue::CHAR: |
352 | 1.33k | if (_x.ByteSizeLong() % 2) |
353 | 920 | prefix = "&#"; |
354 | 417 | else |
355 | 417 | prefix = "&#x"; |
356 | 1.33k | break; |
357 | 83 | case EntityValue::PEREF: |
358 | 83 | prefix = "%"; |
359 | 83 | break; |
360 | 378 | case EntityValue::FUZZ: |
361 | 378 | prefix = "fuzz"; |
362 | 378 | break; |
363 | 0 | case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
364 | 0 | case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
365 | 0 | break; |
366 | 3.27k | } |
367 | 3.27k | for (auto const& ref: _x.name()) |
368 | 6.87k | m_output << prefix << ref << ";"; |
369 | 3.27k | m_output << "\""; |
370 | 3.27k | } |
371 | | |
372 | | void ProtoConverter::visit(EntityDecl const& _x) |
373 | 9.14k | { |
374 | 9.14k | if (!isValid(_x)) |
375 | 86 | return; |
376 | | |
377 | 9.05k | m_output << "<!ENTITY "; |
378 | 9.05k | switch (_x.type()) |
379 | 9.05k | { |
380 | 5.62k | case EntityDecl::GEDECL: |
381 | 5.62k | m_output << _x.name() << " "; |
382 | 5.62k | visit(_x.ent()); |
383 | 5.62k | break; |
384 | 3.43k | case EntityDecl::PEDECL: |
385 | 3.43k | m_output << "% " << _x.name() << " "; |
386 | 3.43k | visit(_x.pedef()); |
387 | 3.43k | break; |
388 | 0 | case EntityDecl_Type_EntityDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
389 | 0 | case EntityDecl_Type_EntityDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
390 | 0 | break; |
391 | 9.05k | } |
392 | 9.05k | m_output << ">"; |
393 | 9.05k | } |
394 | | |
395 | | void ProtoConverter::visit(ConditionalSect const& _x) |
396 | 6.72k | { |
397 | 6.72k | if (!isValid(_x)) |
398 | 70 | return; |
399 | | |
400 | 6.65k | switch (_x.type()) |
401 | 6.65k | { |
402 | 4.61k | case ConditionalSect::INCLUDE: |
403 | 4.61k | m_output << "<![ INCLUDE ["; |
404 | 4.61k | visit(_x.ext()); |
405 | 4.61k | m_output << "]]>"; |
406 | 4.61k | break; |
407 | 396 | case ConditionalSect::IGNORE: |
408 | 396 | m_output << "<![ IGNORE ["; |
409 | 396 | for (auto const& str: _x.ignores()) |
410 | 652 | m_output << "<![" << removeNonAscii(str) << "]]>"; |
411 | 396 | m_output << "]]>"; |
412 | 396 | break; |
413 | 1.63k | case ConditionalSect::FUZZ: |
414 | 1.63k | m_output << "<![ FUZZ ["; |
415 | 1.63k | visit(_x.ext()); |
416 | 1.63k | m_output << "]]>"; |
417 | 1.63k | break; |
418 | 0 | case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
419 | 0 | case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
420 | 0 | break; |
421 | 6.65k | } |
422 | 6.65k | } |
423 | | |
424 | | |
425 | | void ProtoConverter::visit(OneExtSubsetDecl const& _x) |
426 | 26.9k | { |
427 | 26.9k | switch (_x.extsubset_oneof_case()) |
428 | 26.9k | { |
429 | 13.5k | case OneExtSubsetDecl::kM: |
430 | 13.5k | visit(_x.m()); |
431 | 13.5k | break; |
432 | 6.72k | case OneExtSubsetDecl::kC: |
433 | 6.72k | visit(_x.c()); |
434 | 6.72k | break; |
435 | 6.68k | case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET: |
436 | 6.68k | break; |
437 | 26.9k | } |
438 | 26.9k | } |
439 | | |
440 | | |
441 | | void ProtoConverter::visit(ExtSubsetDecl const& _x) |
442 | 9.89k | { |
443 | 9.89k | for (auto const& decl: _x.decls()) |
444 | 26.9k | visit(decl); |
445 | 9.89k | } |
446 | | |
447 | | void ProtoConverter::visit(CData const& _x) |
448 | 2.15k | { |
449 | 2.15k | m_output << "<![CDATA[" << removeNonAscii(_x.data()) << "]]>"; |
450 | 2.15k | } |
451 | | |
452 | | void ProtoConverter::visit(MarkupDecl const& _x) |
453 | 29.1k | { |
454 | 29.1k | switch (_x.markup_oneof_case()) |
455 | 29.1k | { |
456 | 4.29k | case MarkupDecl::kE: |
457 | 4.29k | visit(_x.e()); |
458 | 4.29k | break; |
459 | 3.31k | case MarkupDecl::kA: |
460 | 3.31k | visit(_x.a()); |
461 | 3.31k | break; |
462 | 3.92k | case MarkupDecl::kN: |
463 | 3.92k | visit(_x.n()); |
464 | 3.92k | break; |
465 | 1.36k | case MarkupDecl::kM: |
466 | 1.36k | visit(_x.m()); |
467 | 1.36k | break; |
468 | 9.14k | case MarkupDecl::kEntity: |
469 | 9.14k | visit(_x.entity()); |
470 | 9.14k | break; |
471 | 3.63k | case MarkupDecl::kExt: |
472 | 3.63k | visit(_x.ext()); |
473 | 3.63k | break; |
474 | 3.51k | case MarkupDecl::MARKUP_ONEOF_NOT_SET: |
475 | 3.51k | break; |
476 | 29.1k | } |
477 | 29.1k | } |
478 | | |
479 | | /// Returns predefined element from an Element_Id enum |
480 | | /// @param _x is an enum that holds the desired type of predefined value |
481 | | /// @param _prop is a string that holds the value of the desired type |
482 | | /// @return string holding the predefined value of the form |
483 | | /// name attribute=\"value\" |
484 | | string ProtoConverter::getPredefined(Element_Id _x, string const& _prop) |
485 | 121k | { |
486 | 121k | string output{}; |
487 | 121k | switch (_x) |
488 | 121k | { |
489 | 112k | case Element::XIINCLUDE: |
490 | 113k | case Element::XIFALLBACK: |
491 | 116k | case Element::XIHREF: |
492 | 116k | output = "xi:include href=\"fuzz.xml\""; |
493 | 117k | case Element::XIPARSE: |
494 | 117k | output = "xi:include parse=\"xml\""; |
495 | 117k | case Element::XIXPOINTER: |
496 | 117k | output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\""; |
497 | 119k | case Element::XIENCODING: |
498 | 119k | output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\""; |
499 | 120k | case Element::XIACCEPT: |
500 | 120k | output = "xi:include accept=\"" + removeNonAscii(_prop) + "\""; |
501 | 121k | case Element::XIACCEPTLANG: |
502 | 121k | output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\""; |
503 | 121k | case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: |
504 | 121k | case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: |
505 | 121k | output = "xi:fuzz xifuzz=\"fuzz\""; |
506 | 121k | } |
507 | 121k | return output; |
508 | 121k | } |
509 | | |
510 | | /// Returns uri string for a given Element_Id type |
511 | | string ProtoConverter::getUri(Element_Id _x) |
512 | 121k | { |
513 | 121k | if (!Element::Id_IsValid(_x)) |
514 | 232 | return s_XInclude; |
515 | | |
516 | 121k | switch (_x) |
517 | 121k | { |
518 | 112k | case Element::XIINCLUDE: |
519 | 113k | case Element::XIFALLBACK: |
520 | 116k | case Element::XIHREF: |
521 | 117k | case Element::XIPARSE: |
522 | 117k | case Element::XIXPOINTER: |
523 | 119k | case Element::XIENCODING: |
524 | 120k | case Element::XIACCEPT: |
525 | 121k | case Element::XIACCEPTLANG: |
526 | 121k | case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: |
527 | 121k | case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: |
528 | 121k | return s_XInclude; |
529 | 121k | } |
530 | 121k | } |
531 | | |
532 | | void ProtoConverter::visit(Element const& _x) |
533 | 133k | { |
534 | 133k | if (!isValid(_x)) |
535 | 433 | return; |
536 | | |
537 | | // Predefined child node |
538 | 133k | string child = {}; |
539 | | // Predefined uri for child node |
540 | 133k | string pUri = {}; |
541 | | // Element name |
542 | 133k | string name = removeNonAscii(_x.name()); |
543 | | |
544 | 133k | switch (_x.type()) |
545 | 133k | { |
546 | 121k | case Element::PREDEFINED: |
547 | 121k | child = getPredefined(_x.id(), _x.childprop()); |
548 | 121k | pUri = getUri(_x.id()); |
549 | 121k | break; |
550 | 11.6k | case Element::FUZZ: |
551 | 11.6k | case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
552 | 11.6k | case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
553 | 11.6k | break; |
554 | 133k | } |
555 | | |
556 | | // <name k1=v1 k2=v2 k3=v3> |
557 | | // <content> |
558 | | // </name> |
559 | | |
560 | | // Start name tag: Must be Ascii? |
561 | 133k | m_output << "<" << name << " "; |
562 | | |
563 | | // Add uri to name tag |
564 | 133k | if (!pUri.empty()) |
565 | 121k | m_output << pUri << " "; |
566 | 133k | for (auto const& prop: _x.kv()) |
567 | 17.2k | visit(prop); |
568 | 133k | m_output << ">\n"; |
569 | | |
570 | | // Add attribute |
571 | 133k | if (!child.empty()) |
572 | 121k | m_output << "<" << child << "/>\n"; |
573 | | |
574 | | // Add content |
575 | 133k | visit(_x.content()); |
576 | | |
577 | | // Close name tag |
578 | 133k | m_output << "</" << name << ">\n"; |
579 | 133k | } |
580 | | |
581 | | void ProtoConverter::visit(ExternalId const& _x) |
582 | 11.3k | { |
583 | 11.3k | if (!isValid(_x)) |
584 | 815 | return; |
585 | | |
586 | 10.5k | switch (_x.type()) |
587 | 10.5k | { |
588 | 8.69k | case ExternalId::SYSTEM: |
589 | 8.69k | m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\""; |
590 | 8.69k | break; |
591 | 732 | case ExternalId::PUBLIC: |
592 | 732 | m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\"" |
593 | 732 | << " " << "\"" << removeNonAscii(_x.system()) << "\""; |
594 | 732 | break; |
595 | 1.12k | case ExternalId::FUZZ: |
596 | 1.12k | m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\""; |
597 | 1.12k | break; |
598 | 0 | case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
599 | 0 | case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
600 | 0 | break; |
601 | 10.5k | } |
602 | 10.5k | } |
603 | | |
604 | | void ProtoConverter::visit(DocTypeDecl const& _x) |
605 | 7.06k | { |
606 | 7.06k | m_output << "<!DOCTYPE " << removeNonAscii(_x.name()) << " "; |
607 | 7.06k | visit(_x.ext()); |
608 | 7.06k | m_output << "["; |
609 | 7.06k | for (auto const& m: _x.mdecl()) |
610 | 15.6k | visit(m); |
611 | 7.06k | m_output << "]"; |
612 | 7.06k | m_output << ">\n"; |
613 | 7.06k | } |
614 | | |
615 | | void ProtoConverter::visit(VersionNum const& _x) |
616 | 7.06k | { |
617 | 7.06k | if (!isValid(_x)) |
618 | 6 | return; |
619 | | |
620 | 7.05k | switch (_x.type()) |
621 | 7.05k | { |
622 | 6.92k | case VersionNum::STANDARD: |
623 | 6.92k | m_output << "\"1.0\""; |
624 | 6.92k | break; |
625 | 133 | case VersionNum::FUZZ: |
626 | 133 | case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_: |
627 | 133 | case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_: |
628 | 133 | m_output << "\"" << _x.major() << "." << _x.minor() << "\""; |
629 | 133 | break; |
630 | 7.05k | } |
631 | 7.05k | } |
632 | | |
633 | | void ProtoConverter::visit(Encodings const& _x) |
634 | 7.06k | { |
635 | 7.06k | if (!Encodings::Enc_IsValid(_x.name())) |
636 | 536 | return; |
637 | | |
638 | 6.52k | m_output << " encoding=\""; |
639 | 6.52k | switch (_x.name()) |
640 | 6.52k | { |
641 | 5.76k | case Encodings::BIG5: |
642 | 5.76k | m_output << "BIG5"; |
643 | 5.76k | break; |
644 | 8 | case Encodings::EUCJP: |
645 | 8 | m_output << "EUC-JP"; |
646 | 8 | break; |
647 | 23 | case Encodings::EUCKR: |
648 | 23 | m_output << "EUC-KR"; |
649 | 23 | break; |
650 | 17 | case Encodings::GB18030: |
651 | 17 | m_output << "GB18030"; |
652 | 17 | break; |
653 | 4 | case Encodings::ISO2022JP: |
654 | 4 | m_output << "ISO-2022-JP"; |
655 | 4 | break; |
656 | 125 | case Encodings::ISO2022KR: |
657 | 125 | m_output << "ISO-2022-KR"; |
658 | 125 | break; |
659 | 66 | case Encodings::ISO88591: |
660 | 66 | m_output << "ISO-8859-1"; |
661 | 66 | break; |
662 | 3 | case Encodings::ISO88592: |
663 | 3 | m_output << "ISO-8859-2"; |
664 | 3 | break; |
665 | 6 | case Encodings::ISO88593: |
666 | 6 | m_output << "ISO-8859-3"; |
667 | 6 | break; |
668 | 9 | case Encodings::ISO88594: |
669 | 9 | m_output << "ISO-8859-4"; |
670 | 9 | break; |
671 | 12 | case Encodings::ISO88595: |
672 | 12 | m_output << "ISO-8859-5"; |
673 | 12 | break; |
674 | 3 | case Encodings::ISO88596: |
675 | 3 | m_output << "ISO-8859-6"; |
676 | 3 | break; |
677 | 11 | case Encodings::ISO88597: |
678 | 11 | m_output << "ISO-8859-7"; |
679 | 11 | break; |
680 | 3 | case Encodings::ISO88598: |
681 | 3 | m_output << "ISO-8859-8"; |
682 | 3 | break; |
683 | 12 | case Encodings::ISO88599: |
684 | 12 | m_output << "ISO-8859-9"; |
685 | 12 | break; |
686 | 81 | case Encodings::SHIFTJIS: |
687 | 81 | m_output << "SHIFT_JIS"; |
688 | 81 | break; |
689 | 8 | case Encodings::TIS620: |
690 | 8 | m_output << "TIS-620"; |
691 | 8 | break; |
692 | 76 | case Encodings::USASCII: |
693 | 76 | m_output << "US-ASCII"; |
694 | 76 | break; |
695 | 67 | case Encodings::UTF8: |
696 | 67 | m_output << "UTF-8"; |
697 | 67 | break; |
698 | 43 | case Encodings::UTF16: |
699 | 43 | m_output << "UTF-16"; |
700 | 43 | break; |
701 | 24 | case Encodings::UTF16BE: |
702 | 24 | m_output << "UTF-16BE"; |
703 | 24 | break; |
704 | 7 | case Encodings::UTF16LE: |
705 | 7 | m_output << "UTF-16LE"; |
706 | 7 | break; |
707 | 26 | case Encodings::WINDOWS31J: |
708 | 26 | m_output << "WINDOWS-31J"; |
709 | 26 | break; |
710 | 7 | case Encodings::WINDOWS1255: |
711 | 7 | m_output << "WINDOWS-1255"; |
712 | 7 | break; |
713 | 6 | case Encodings::WINDOWS1256: |
714 | 6 | m_output << "WINDOWS-1256"; |
715 | 6 | break; |
716 | 121 | case Encodings::FUZZ: |
717 | 121 | m_output << removeNonAscii(_x.fuzz()); |
718 | 121 | break; |
719 | 0 | case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_: |
720 | 0 | case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_: |
721 | 0 | break; |
722 | 6.52k | } |
723 | 6.52k | m_output << "\""; |
724 | 6.52k | } |
725 | | |
726 | | void ProtoConverter::visit(XmlDeclaration const& _x) |
727 | 7.06k | { |
728 | 7.06k | m_output << R"(<?xml version=)"; |
729 | 7.06k | visit(_x.ver()); |
730 | 7.06k | visit(_x.enc()); |
731 | 7.06k | switch (_x.standalone()) |
732 | 7.06k | { |
733 | 6.71k | case XmlDeclaration::YES: |
734 | 6.71k | m_output << " standalone=\'yes\'"; |
735 | 6.71k | break; |
736 | 287 | case XmlDeclaration::NO: |
737 | 287 | m_output << " standalone=\'no\'"; |
738 | 287 | break; |
739 | 0 | case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MIN_SENTINEL_DO_NOT_USE_: |
740 | 0 | case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MAX_SENTINEL_DO_NOT_USE_: |
741 | 63 | default: |
742 | 63 | break; |
743 | 7.06k | } |
744 | 7.06k | m_output << "?>\n"; |
745 | 7.06k | } |
746 | | |
747 | | void ProtoConverter::visit(XmlDocument const& _x) |
748 | 7.06k | { |
749 | 7.06k | visit(_x.p()); |
750 | 7.06k | for (auto const& element: _x.e()) |
751 | 131k | visit(element); |
752 | 7.06k | } |
753 | | |
754 | | string ProtoConverter::protoToString(XmlDocument const& _x) |
755 | 7.06k | { |
756 | 7.06k | visit(_x); |
757 | 7.06k | return m_output.str(); |
758 | 7.06k | } |