/src/poco/dependencies/expat/src/xmltok_impl.c
Line | Count | Source |
1 | | /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! |
2 | | __ __ _ |
3 | | ___\ \/ /_ __ __ _| |_ |
4 | | / _ \\ /| '_ \ / _` | __| |
5 | | | __// \| |_) | (_| | |_ |
6 | | \___/_/\_\ .__/ \__,_|\__| |
7 | | |_| XML parser |
8 | | |
9 | | Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
10 | | Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> |
11 | | Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> |
12 | | Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> |
13 | | Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> |
14 | | Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> |
15 | | Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> |
16 | | Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> |
17 | | Copyright (c) 2019 David Loffredo <loffredo@steptools.com> |
18 | | Copyright (c) 2020 Boris Kolpackov <boris@codesynthesis.com> |
19 | | Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> |
20 | | Licensed under the MIT license: |
21 | | |
22 | | Permission is hereby granted, free of charge, to any person obtaining |
23 | | a copy of this software and associated documentation files (the |
24 | | "Software"), to deal in the Software without restriction, including |
25 | | without limitation the rights to use, copy, modify, merge, publish, |
26 | | distribute, sublicense, and/or sell copies of the Software, and to permit |
27 | | persons to whom the Software is furnished to do so, subject to the |
28 | | following conditions: |
29 | | |
30 | | The above copyright notice and this permission notice shall be included |
31 | | in all copies or substantial portions of the Software. |
32 | | |
33 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
34 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
35 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
36 | | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
37 | | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
38 | | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
39 | | USE OR OTHER DEALINGS IN THE SOFTWARE. |
40 | | */ |
41 | | |
42 | | #ifdef XML_TOK_IMPL_C |
43 | | |
44 | | # ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined |
45 | 16.6M | # define IS_INVALID_CHAR(enc, ptr, n) (0) |
46 | | # endif |
47 | | |
48 | | # define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
49 | 108M | case BT_LEAD##n: \ |
50 | 108M | if (end - ptr < n) \ |
51 | 108M | return XML_TOK_PARTIAL_CHAR; \ |
52 | 108M | if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
53 | 686 | *(nextTokPtr) = (ptr); \ |
54 | 686 | return XML_TOK_INVALID; \ |
55 | 686 | } \ |
56 | 108M | ptr += n; \ |
57 | 108M | break; |
58 | | |
59 | | # define INVALID_CASES(ptr, nextTokPtr) \ |
60 | 71.5M | INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
61 | 71.5M | INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
62 | 21.7M | INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
63 | 15.2M | case BT_NONXML: \ |
64 | 3.27k | case BT_MALFORM: \ |
65 | 4.28k | case BT_TRAIL: \ |
66 | 4.28k | *(nextTokPtr) = (ptr); \ |
67 | 4.28k | return XML_TOK_INVALID; |
68 | | |
69 | | # define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
70 | 12.5M | case BT_LEAD##n: \ |
71 | 12.5M | if (end - ptr < n) \ |
72 | 12.5M | return XML_TOK_PARTIAL_CHAR; \ |
73 | 12.5M | if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ |
74 | 1.81k | *nextTokPtr = ptr; \ |
75 | 1.81k | return XML_TOK_INVALID; \ |
76 | 1.81k | } \ |
77 | 12.5M | ptr += n; \ |
78 | 12.5M | break; |
79 | | |
80 | | # define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
81 | 380M | case BT_NONASCII: \ |
82 | 380M | if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
83 | 1.84k | *nextTokPtr = ptr; \ |
84 | 1.84k | return XML_TOK_INVALID; \ |
85 | 1.84k | } \ |
86 | 380M | /* fall through */ \ |
87 | 380M | case BT_NMSTRT: \ |
88 | 350M | case BT_HEX: \ |
89 | 377M | case BT_DIGIT: \ |
90 | 379M | case BT_NAME: \ |
91 | 380M | case BT_MINUS: \ |
92 | 380M | ptr += MINBPC(enc); \ |
93 | 380M | break; \ |
94 | 380M | CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
95 | 11.2M | CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
96 | 1.29M | CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
97 | | |
98 | | # define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
99 | 471k | case BT_LEAD##n: \ |
100 | 471k | if ((end) - (ptr) < (n)) \ |
101 | 468k | return XML_TOK_PARTIAL_CHAR; \ |
102 | 469k | if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
103 | 1.46k | *nextTokPtr = ptr; \ |
104 | 1.46k | return XML_TOK_INVALID; \ |
105 | 1.46k | } \ |
106 | 469k | ptr += n; \ |
107 | 468k | break; |
108 | | |
109 | | # define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
110 | 35.7M | case BT_NONASCII: \ |
111 | 35.7M | if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
112 | 797 | *nextTokPtr = ptr; \ |
113 | 797 | return XML_TOK_INVALID; \ |
114 | 797 | } \ |
115 | 35.7M | /* fall through */ \ |
116 | 35.7M | case BT_NMSTRT: \ |
117 | 35.7M | case BT_HEX: \ |
118 | 35.7M | ptr += MINBPC(enc); \ |
119 | 35.7M | break; \ |
120 | 35.7M | CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
121 | 237k | CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
122 | 230k | CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
123 | | |
124 | | # ifndef PREFIX |
125 | | # define PREFIX(ident) ident |
126 | | # endif |
127 | | |
128 | | # define HAS_CHARS(enc, ptr, end, count) \ |
129 | 9.06G | ((end) - (ptr) >= ((count) * MINBPC(enc))) |
130 | | |
131 | 152M | # define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) |
132 | | |
133 | | # define REQUIRE_CHARS(enc, ptr, end, count) \ |
134 | 321M | { \ |
135 | 321M | if (! HAS_CHARS(enc, ptr, end, count)) { \ |
136 | 17.6k | return XML_TOK_PARTIAL; \ |
137 | 17.6k | } \ |
138 | 321M | } |
139 | | |
140 | 320M | # define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) |
141 | | |
142 | | /* ptr points to character following "<!-" */ |
143 | | |
144 | | static int PTRCALL |
145 | | PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, |
146 | 1.17M | const char **nextTokPtr) { |
147 | 1.17M | if (HAS_CHAR(enc, ptr, end)) { |
148 | 1.17M | if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
149 | 144 | *nextTokPtr = ptr; |
150 | 144 | return XML_TOK_INVALID; |
151 | 144 | } |
152 | 1.17M | ptr += MINBPC(enc); |
153 | 101M | while (HAS_CHAR(enc, ptr, end)) { |
154 | 101M | switch (BYTE_TYPE(enc, ptr)) { |
155 | 139M | INVALID_CASES(ptr, nextTokPtr) |
156 | 1.28M | case BT_MINUS: |
157 | 1.28M | ptr += MINBPC(enc); |
158 | 1.28M | REQUIRE_CHAR(enc, ptr, end); |
159 | 1.28M | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
160 | 1.16M | ptr += MINBPC(enc); |
161 | 1.16M | REQUIRE_CHAR(enc, ptr, end); |
162 | 1.16M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
163 | 174 | *nextTokPtr = ptr; |
164 | 174 | return XML_TOK_INVALID; |
165 | 174 | } |
166 | 1.16M | *nextTokPtr = ptr + MINBPC(enc); |
167 | 1.16M | return XML_TOK_COMMENT; |
168 | 1.16M | } |
169 | 118k | break; |
170 | 30.5M | default: |
171 | 30.5M | ptr += MINBPC(enc); |
172 | 30.5M | break; |
173 | 101M | } |
174 | 101M | } |
175 | 1.17M | } |
176 | 5.19k | return XML_TOK_PARTIAL; |
177 | 1.17M | } xmltok.c:normal_scanComment Line | Count | Source | 146 | 1.13M | const char **nextTokPtr) { | 147 | 1.13M | if (HAS_CHAR(enc, ptr, end)) { | 148 | 1.13M | if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 149 | 59 | *nextTokPtr = ptr; | 150 | 59 | return XML_TOK_INVALID; | 151 | 59 | } | 152 | 1.13M | ptr += MINBPC(enc); | 153 | 75.2M | while (HAS_CHAR(enc, ptr, end)) { | 154 | 75.2M | switch (BYTE_TYPE(enc, ptr)) { | 155 | 133M | INVALID_CASES(ptr, nextTokPtr) | 156 | 1.16M | case BT_MINUS: | 157 | 1.16M | ptr += MINBPC(enc); | 158 | 1.16M | REQUIRE_CHAR(enc, ptr, end); | 159 | 1.16M | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 160 | 1.13M | ptr += MINBPC(enc); | 161 | 1.13M | REQUIRE_CHAR(enc, ptr, end); | 162 | 1.13M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 163 | 104 | *nextTokPtr = ptr; | 164 | 104 | return XML_TOK_INVALID; | 165 | 104 | } | 166 | 1.13M | *nextTokPtr = ptr + MINBPC(enc); | 167 | 1.13M | return XML_TOK_COMMENT; | 168 | 1.13M | } | 169 | 36.9k | break; | 170 | 7.38M | default: | 171 | 7.38M | ptr += MINBPC(enc); | 172 | 7.38M | break; | 173 | 75.2M | } | 174 | 75.2M | } | 175 | 1.13M | } | 176 | 1.56k | return XML_TOK_PARTIAL; | 177 | 1.13M | } |
xmltok.c:little2_scanComment Line | Count | Source | 146 | 37.2k | const char **nextTokPtr) { | 147 | 37.2k | if (HAS_CHAR(enc, ptr, end)) { | 148 | 37.1k | if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 149 | 35 | *nextTokPtr = ptr; | 150 | 35 | return XML_TOK_INVALID; | 151 | 35 | } | 152 | 37.1k | ptr += MINBPC(enc); | 153 | 13.5M | while (HAS_CHAR(enc, ptr, end)) { | 154 | 13.5M | switch (BYTE_TYPE(enc, ptr)) { | 155 | 2.91M | INVALID_CASES(ptr, nextTokPtr) | 156 | 110k | case BT_MINUS: | 157 | 110k | ptr += MINBPC(enc); | 158 | 110k | REQUIRE_CHAR(enc, ptr, end); | 159 | 110k | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 160 | 33.6k | ptr += MINBPC(enc); | 161 | 33.6k | REQUIRE_CHAR(enc, ptr, end); | 162 | 33.6k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 163 | 31 | *nextTokPtr = ptr; | 164 | 31 | return XML_TOK_INVALID; | 165 | 31 | } | 166 | 33.6k | *nextTokPtr = ptr + MINBPC(enc); | 167 | 33.6k | return XML_TOK_COMMENT; | 168 | 33.6k | } | 169 | 76.7k | break; | 170 | 11.9M | default: | 171 | 11.9M | ptr += MINBPC(enc); | 172 | 11.9M | break; | 173 | 13.5M | } | 174 | 13.5M | } | 175 | 37.1k | } | 176 | 2.78k | return XML_TOK_PARTIAL; | 177 | 37.2k | } |
xmltok.c:big2_scanComment Line | Count | Source | 146 | 2.93k | const char **nextTokPtr) { | 147 | 2.93k | if (HAS_CHAR(enc, ptr, end)) { | 148 | 2.88k | if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 149 | 50 | *nextTokPtr = ptr; | 150 | 50 | return XML_TOK_INVALID; | 151 | 50 | } | 152 | 2.83k | ptr += MINBPC(enc); | 153 | 12.6M | while (HAS_CHAR(enc, ptr, end)) { | 154 | 12.6M | switch (BYTE_TYPE(enc, ptr)) { | 155 | 2.79M | INVALID_CASES(ptr, nextTokPtr) | 156 | 6.02k | case BT_MINUS: | 157 | 6.02k | ptr += MINBPC(enc); | 158 | 6.02k | REQUIRE_CHAR(enc, ptr, end); | 159 | 5.96k | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { | 160 | 1.61k | ptr += MINBPC(enc); | 161 | 1.61k | REQUIRE_CHAR(enc, ptr, end); | 162 | 1.58k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 163 | 39 | *nextTokPtr = ptr; | 164 | 39 | return XML_TOK_INVALID; | 165 | 39 | } | 166 | 1.54k | *nextTokPtr = ptr + MINBPC(enc); | 167 | 1.54k | return XML_TOK_COMMENT; | 168 | 1.58k | } | 169 | 4.34k | break; | 170 | 11.2M | default: | 171 | 11.2M | ptr += MINBPC(enc); | 172 | 11.2M | break; | 173 | 12.6M | } | 174 | 12.6M | } | 175 | 2.83k | } | 176 | 852 | return XML_TOK_PARTIAL; | 177 | 2.93k | } |
|
178 | | |
179 | | /* ptr points to character following "<!" */ |
180 | | |
181 | | static int PTRCALL |
182 | | PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, |
183 | 823k | const char **nextTokPtr) { |
184 | 823k | REQUIRE_CHAR(enc, ptr, end); |
185 | 823k | switch (BYTE_TYPE(enc, ptr)) { |
186 | 459k | case BT_MINUS: |
187 | 459k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
188 | 21 | case BT_LSQB: |
189 | 21 | *nextTokPtr = ptr + MINBPC(enc); |
190 | 21 | return XML_TOK_COND_SECT_OPEN; |
191 | 106k | case BT_NMSTRT: |
192 | 363k | case BT_HEX: |
193 | 363k | ptr += MINBPC(enc); |
194 | 363k | break; |
195 | 179 | default: |
196 | 179 | *nextTokPtr = ptr; |
197 | 179 | return XML_TOK_INVALID; |
198 | 823k | } |
199 | 2.98M | while (HAS_CHAR(enc, ptr, end)) { |
200 | 2.98M | switch (BYTE_TYPE(enc, ptr)) { |
201 | 238 | case BT_PERCNT: |
202 | 238 | REQUIRE_CHARS(enc, ptr, end, 2); |
203 | | /* don't allow <!ENTITY% foo "whatever"> */ |
204 | 207 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
205 | 21 | case BT_S: |
206 | 39 | case BT_CR: |
207 | 57 | case BT_LF: |
208 | 75 | case BT_PERCNT: |
209 | 75 | *nextTokPtr = ptr; |
210 | 75 | return XML_TOK_INVALID; |
211 | 207 | } |
212 | | /* fall through */ |
213 | 172k | case BT_S: |
214 | 220k | case BT_CR: |
215 | 361k | case BT_LF: |
216 | 361k | *nextTokPtr = ptr; |
217 | 361k | return XML_TOK_DECL_OPEN; |
218 | 1.87M | case BT_NMSTRT: |
219 | 2.62M | case BT_HEX: |
220 | 2.62M | ptr += MINBPC(enc); |
221 | 2.62M | break; |
222 | 309 | default: |
223 | 309 | *nextTokPtr = ptr; |
224 | 309 | return XML_TOK_INVALID; |
225 | 2.98M | } |
226 | 2.98M | } |
227 | 1.28k | return XML_TOK_PARTIAL; |
228 | 363k | } Line | Count | Source | 183 | 685k | const char **nextTokPtr) { | 184 | 685k | REQUIRE_CHAR(enc, ptr, end); | 185 | 685k | switch (BYTE_TYPE(enc, ptr)) { | 186 | 455k | case BT_MINUS: | 187 | 455k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 188 | 9 | case BT_LSQB: | 189 | 9 | *nextTokPtr = ptr + MINBPC(enc); | 190 | 9 | return XML_TOK_COND_SECT_OPEN; | 191 | 16.6k | case BT_NMSTRT: | 192 | 229k | case BT_HEX: | 193 | 229k | ptr += MINBPC(enc); | 194 | 229k | break; | 195 | 82 | default: | 196 | 82 | *nextTokPtr = ptr; | 197 | 82 | return XML_TOK_INVALID; | 198 | 685k | } | 199 | 1.98M | while (HAS_CHAR(enc, ptr, end)) { | 200 | 1.98M | switch (BYTE_TYPE(enc, ptr)) { | 201 | 115 | case BT_PERCNT: | 202 | 115 | REQUIRE_CHARS(enc, ptr, end, 2); | 203 | | /* don't allow <!ENTITY% foo "whatever"> */ | 204 | 99 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { | 205 | 9 | case BT_S: | 206 | 15 | case BT_CR: | 207 | 21 | case BT_LF: | 208 | 27 | case BT_PERCNT: | 209 | 27 | *nextTokPtr = ptr; | 210 | 27 | return XML_TOK_INVALID; | 211 | 99 | } | 212 | | /* fall through */ | 213 | 130k | case BT_S: | 214 | 163k | case BT_CR: | 215 | 228k | case BT_LF: | 216 | 228k | *nextTokPtr = ptr; | 217 | 228k | return XML_TOK_DECL_OPEN; | 218 | 1.14M | case BT_NMSTRT: | 219 | 1.75M | case BT_HEX: | 220 | 1.75M | ptr += MINBPC(enc); | 221 | 1.75M | break; | 222 | 139 | default: | 223 | 139 | *nextTokPtr = ptr; | 224 | 139 | return XML_TOK_INVALID; | 225 | 1.98M | } | 226 | 1.98M | } | 227 | 547 | return XML_TOK_PARTIAL; | 228 | 229k | } |
xmltok.c:little2_scanDecl Line | Count | Source | 183 | 33.2k | const char **nextTokPtr) { | 184 | 33.2k | REQUIRE_CHAR(enc, ptr, end); | 185 | 33.2k | switch (BYTE_TYPE(enc, ptr)) { | 186 | 2.49k | case BT_MINUS: | 187 | 2.49k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 188 | 6 | case BT_LSQB: | 189 | 6 | *nextTokPtr = ptr + MINBPC(enc); | 190 | 6 | return XML_TOK_COND_SECT_OPEN; | 191 | 5.37k | case BT_NMSTRT: | 192 | 30.6k | case BT_HEX: | 193 | 30.6k | ptr += MINBPC(enc); | 194 | 30.6k | break; | 195 | 60 | default: | 196 | 60 | *nextTokPtr = ptr; | 197 | 60 | return XML_TOK_INVALID; | 198 | 33.2k | } | 199 | 209k | while (HAS_CHAR(enc, ptr, end)) { | 200 | 209k | switch (BYTE_TYPE(enc, ptr)) { | 201 | 71 | case BT_PERCNT: | 202 | 71 | REQUIRE_CHARS(enc, ptr, end, 2); | 203 | | /* don't allow <!ENTITY% foo "whatever"> */ | 204 | 62 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { | 205 | 6 | case BT_S: | 206 | 12 | case BT_CR: | 207 | 18 | case BT_LF: | 208 | 24 | case BT_PERCNT: | 209 | 24 | *nextTokPtr = ptr; | 210 | 24 | return XML_TOK_INVALID; | 211 | 62 | } | 212 | | /* fall through */ | 213 | 17.9k | case BT_S: | 214 | 25.6k | case BT_CR: | 215 | 30.3k | case BT_LF: | 216 | 30.3k | *nextTokPtr = ptr; | 217 | 30.3k | return XML_TOK_DECL_OPEN; | 218 | 142k | case BT_NMSTRT: | 219 | 179k | case BT_HEX: | 220 | 179k | ptr += MINBPC(enc); | 221 | 179k | break; | 222 | 67 | default: | 223 | 67 | *nextTokPtr = ptr; | 224 | 67 | return XML_TOK_INVALID; | 225 | 209k | } | 226 | 209k | } | 227 | 187 | return XML_TOK_PARTIAL; | 228 | 30.6k | } |
Line | Count | Source | 183 | 104k | const char **nextTokPtr) { | 184 | 104k | REQUIRE_CHAR(enc, ptr, end); | 185 | 104k | switch (BYTE_TYPE(enc, ptr)) { | 186 | 1.51k | case BT_MINUS: | 187 | 1.51k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 188 | 6 | case BT_LSQB: | 189 | 6 | *nextTokPtr = ptr + MINBPC(enc); | 190 | 6 | return XML_TOK_COND_SECT_OPEN; | 191 | 84.5k | case BT_NMSTRT: | 192 | 102k | case BT_HEX: | 193 | 102k | ptr += MINBPC(enc); | 194 | 102k | break; | 195 | 37 | default: | 196 | 37 | *nextTokPtr = ptr; | 197 | 37 | return XML_TOK_INVALID; | 198 | 104k | } | 199 | 793k | while (HAS_CHAR(enc, ptr, end)) { | 200 | 793k | switch (BYTE_TYPE(enc, ptr)) { | 201 | 52 | case BT_PERCNT: | 202 | 52 | REQUIRE_CHARS(enc, ptr, end, 2); | 203 | | /* don't allow <!ENTITY% foo "whatever"> */ | 204 | 46 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { | 205 | 6 | case BT_S: | 206 | 12 | case BT_CR: | 207 | 18 | case BT_LF: | 208 | 24 | case BT_PERCNT: | 209 | 24 | *nextTokPtr = ptr; | 210 | 24 | return XML_TOK_INVALID; | 211 | 46 | } | 212 | | /* fall through */ | 213 | 24.1k | case BT_S: | 214 | 31.0k | case BT_CR: | 215 | 102k | case BT_LF: | 216 | 102k | *nextTokPtr = ptr; | 217 | 102k | return XML_TOK_DECL_OPEN; | 218 | 588k | case BT_NMSTRT: | 219 | 690k | case BT_HEX: | 220 | 690k | ptr += MINBPC(enc); | 221 | 690k | break; | 222 | 103 | default: | 223 | 103 | *nextTokPtr = ptr; | 224 | 103 | return XML_TOK_INVALID; | 225 | 793k | } | 226 | 793k | } | 227 | 546 | return XML_TOK_PARTIAL; | 228 | 102k | } |
|
229 | | |
230 | | static int PTRCALL |
231 | | PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, |
232 | 510k | int *tokPtr) { |
233 | 510k | int upper = 0; |
234 | 510k | UNUSED_P(enc); |
235 | 510k | *tokPtr = XML_TOK_PI; |
236 | 510k | if (end - ptr != MINBPC(enc) * 3) |
237 | 342k | return 1; |
238 | 168k | switch (BYTE_TO_ASCII(enc, ptr)) { |
239 | 26.7k | case ASCII_x: |
240 | 26.7k | break; |
241 | 11.2k | case ASCII_X: |
242 | 11.2k | upper = 1; |
243 | 11.2k | break; |
244 | 130k | default: |
245 | 130k | return 1; |
246 | 168k | } |
247 | 38.0k | ptr += MINBPC(enc); |
248 | 38.0k | switch (BYTE_TO_ASCII(enc, ptr)) { |
249 | 18.2k | case ASCII_m: |
250 | 18.2k | break; |
251 | 10.0k | case ASCII_M: |
252 | 10.0k | upper = 1; |
253 | 10.0k | break; |
254 | 9.77k | default: |
255 | 9.77k | return 1; |
256 | 38.0k | } |
257 | 28.2k | ptr += MINBPC(enc); |
258 | 28.2k | switch (BYTE_TO_ASCII(enc, ptr)) { |
259 | 9.23k | case ASCII_l: |
260 | 9.23k | break; |
261 | 36 | case ASCII_L: |
262 | 36 | upper = 1; |
263 | 36 | break; |
264 | 19.0k | default: |
265 | 19.0k | return 1; |
266 | 28.2k | } |
267 | 9.27k | if (upper) |
268 | 75 | return 0; |
269 | 9.19k | *tokPtr = XML_TOK_XML_DECL; |
270 | 9.19k | return 1; |
271 | 9.27k | } xmltok.c:normal_checkPiTarget Line | Count | Source | 232 | 123k | int *tokPtr) { | 233 | 123k | int upper = 0; | 234 | 123k | UNUSED_P(enc); | 235 | 123k | *tokPtr = XML_TOK_PI; | 236 | 123k | if (end - ptr != MINBPC(enc) * 3) | 237 | 70.7k | return 1; | 238 | 52.6k | switch (BYTE_TO_ASCII(enc, ptr)) { | 239 | 20.0k | case ASCII_x: | 240 | 20.0k | break; | 241 | 2.86k | case ASCII_X: | 242 | 2.86k | upper = 1; | 243 | 2.86k | break; | 244 | 29.7k | default: | 245 | 29.7k | return 1; | 246 | 52.6k | } | 247 | 22.9k | ptr += MINBPC(enc); | 248 | 22.9k | switch (BYTE_TO_ASCII(enc, ptr)) { | 249 | 14.1k | case ASCII_m: | 250 | 14.1k | break; | 251 | 4.67k | case ASCII_M: | 252 | 4.67k | upper = 1; | 253 | 4.67k | break; | 254 | 4.11k | default: | 255 | 4.11k | return 1; | 256 | 22.9k | } | 257 | 18.8k | ptr += MINBPC(enc); | 258 | 18.8k | switch (BYTE_TO_ASCII(enc, ptr)) { | 259 | 8.97k | case ASCII_l: | 260 | 8.97k | break; | 261 | 12 | case ASCII_L: | 262 | 12 | upper = 1; | 263 | 12 | break; | 264 | 9.83k | default: | 265 | 9.83k | return 1; | 266 | 18.8k | } | 267 | 8.98k | if (upper) | 268 | 27 | return 0; | 269 | 8.95k | *tokPtr = XML_TOK_XML_DECL; | 270 | 8.95k | return 1; | 271 | 8.98k | } |
xmltok.c:little2_checkPiTarget Line | Count | Source | 232 | 130k | int *tokPtr) { | 233 | 130k | int upper = 0; | 234 | 130k | UNUSED_P(enc); | 235 | 130k | *tokPtr = XML_TOK_PI; | 236 | 130k | if (end - ptr != MINBPC(enc) * 3) | 237 | 115k | return 1; | 238 | 14.8k | switch (BYTE_TO_ASCII(enc, ptr)) { | 239 | 4.16k | case ASCII_x: | 240 | 4.16k | break; | 241 | 5.34k | case ASCII_X: | 242 | 5.34k | upper = 1; | 243 | 5.34k | break; | 244 | 5.33k | default: | 245 | 5.33k | return 1; | 246 | 14.8k | } | 247 | 9.51k | ptr += MINBPC(enc); | 248 | 9.51k | switch (BYTE_TO_ASCII(enc, ptr)) { | 249 | 2.60k | case ASCII_m: | 250 | 2.60k | break; | 251 | 2.82k | case ASCII_M: | 252 | 2.82k | upper = 1; | 253 | 2.82k | break; | 254 | 4.07k | default: | 255 | 4.07k | return 1; | 256 | 9.51k | } | 257 | 5.43k | ptr += MINBPC(enc); | 258 | 5.43k | switch (BYTE_TO_ASCII(enc, ptr)) { | 259 | 90 | case ASCII_l: | 260 | 90 | break; | 261 | 12 | case ASCII_L: | 262 | 12 | upper = 1; | 263 | 12 | break; | 264 | 5.33k | default: | 265 | 5.33k | return 1; | 266 | 5.43k | } | 267 | 102 | if (upper) | 268 | 24 | return 0; | 269 | 78 | *tokPtr = XML_TOK_XML_DECL; | 270 | 78 | return 1; | 271 | 102 | } |
xmltok.c:big2_checkPiTarget Line | Count | Source | 232 | 256k | int *tokPtr) { | 233 | 256k | int upper = 0; | 234 | 256k | UNUSED_P(enc); | 235 | 256k | *tokPtr = XML_TOK_PI; | 236 | 256k | if (end - ptr != MINBPC(enc) * 3) | 237 | 155k | return 1; | 238 | 101k | switch (BYTE_TO_ASCII(enc, ptr)) { | 239 | 2.55k | case ASCII_x: | 240 | 2.55k | break; | 241 | 3.07k | case ASCII_X: | 242 | 3.07k | upper = 1; | 243 | 3.07k | break; | 244 | 95.4k | default: | 245 | 95.4k | return 1; | 246 | 101k | } | 247 | 5.63k | ptr += MINBPC(enc); | 248 | 5.63k | switch (BYTE_TO_ASCII(enc, ptr)) { | 249 | 1.48k | case ASCII_m: | 250 | 1.48k | break; | 251 | 2.55k | case ASCII_M: | 252 | 2.55k | upper = 1; | 253 | 2.55k | break; | 254 | 1.59k | default: | 255 | 1.59k | return 1; | 256 | 5.63k | } | 257 | 4.04k | ptr += MINBPC(enc); | 258 | 4.04k | switch (BYTE_TO_ASCII(enc, ptr)) { | 259 | 175 | case ASCII_l: | 260 | 175 | break; | 261 | 12 | case ASCII_L: | 262 | 12 | upper = 1; | 263 | 12 | break; | 264 | 3.85k | default: | 265 | 3.85k | return 1; | 266 | 4.04k | } | 267 | 187 | if (upper) | 268 | 24 | return 0; | 269 | 163 | *tokPtr = XML_TOK_XML_DECL; | 270 | 163 | return 1; | 271 | 187 | } |
|
272 | | |
273 | | /* ptr points to character following "<?" */ |
274 | | |
275 | | static int PTRCALL |
276 | | PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, |
277 | 514k | const char **nextTokPtr) { |
278 | 514k | int tok; |
279 | 514k | const char *target = ptr; |
280 | 514k | REQUIRE_CHAR(enc, ptr, end); |
281 | 513k | switch (BYTE_TYPE(enc, ptr)) { |
282 | 563k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
283 | 272 | default: |
284 | 272 | *nextTokPtr = ptr; |
285 | 272 | return XML_TOK_INVALID; |
286 | 513k | } |
287 | 18.0M | while (HAS_CHAR(enc, ptr, end)) { |
288 | 18.0M | switch (BYTE_TYPE(enc, ptr)) { |
289 | 66.7M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
290 | 44.5k | case BT_S: |
291 | 222k | case BT_CR: |
292 | 433k | case BT_LF: |
293 | 433k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
294 | 39 | *nextTokPtr = ptr; |
295 | 39 | return XML_TOK_INVALID; |
296 | 39 | } |
297 | 433k | ptr += MINBPC(enc); |
298 | 125M | while (HAS_CHAR(enc, ptr, end)) { |
299 | 125M | switch (BYTE_TYPE(enc, ptr)) { |
300 | 33.6M | INVALID_CASES(ptr, nextTokPtr) |
301 | 737k | case BT_QUEST: |
302 | 737k | ptr += MINBPC(enc); |
303 | 737k | REQUIRE_CHAR(enc, ptr, end); |
304 | 736k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
305 | 422k | *nextTokPtr = ptr + MINBPC(enc); |
306 | 422k | return tok; |
307 | 422k | } |
308 | 314k | break; |
309 | 107M | default: |
310 | 107M | ptr += MINBPC(enc); |
311 | 107M | break; |
312 | 125M | } |
313 | 125M | } |
314 | 9.08k | return XML_TOK_PARTIAL; |
315 | 77.3k | case BT_QUEST: |
316 | 77.3k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
317 | 36 | *nextTokPtr = ptr; |
318 | 36 | return XML_TOK_INVALID; |
319 | 36 | } |
320 | 77.2k | ptr += MINBPC(enc); |
321 | 77.2k | REQUIRE_CHAR(enc, ptr, end); |
322 | 76.9k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
323 | 76.6k | *nextTokPtr = ptr + MINBPC(enc); |
324 | 76.6k | return tok; |
325 | 76.6k | } |
326 | | /* fall through */ |
327 | 604 | default: |
328 | 604 | *nextTokPtr = ptr; |
329 | 604 | return XML_TOK_INVALID; |
330 | 18.0M | } |
331 | 18.0M | } |
332 | 1.45k | return XML_TOK_PARTIAL; |
333 | 513k | } Line | Count | Source | 277 | 124k | const char **nextTokPtr) { | 278 | 124k | int tok; | 279 | 124k | const char *target = ptr; | 280 | 124k | REQUIRE_CHAR(enc, ptr, end); | 281 | 124k | switch (BYTE_TYPE(enc, ptr)) { | 282 | 194k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 283 | 63 | default: | 284 | 63 | *nextTokPtr = ptr; | 285 | 63 | return XML_TOK_INVALID; | 286 | 124k | } | 287 | 15.9M | while (HAS_CHAR(enc, ptr, end)) { | 288 | 15.9M | switch (BYTE_TYPE(enc, ptr)) { | 289 | 58.6M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 290 | 23.1k | case BT_S: | 291 | 26.3k | case BT_CR: | 292 | 98.1k | case BT_LF: | 293 | 98.1k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 294 | 15 | *nextTokPtr = ptr; | 295 | 15 | return XML_TOK_INVALID; | 296 | 15 | } | 297 | 98.1k | ptr += MINBPC(enc); | 298 | 69.8M | while (HAS_CHAR(enc, ptr, end)) { | 299 | 69.8M | switch (BYTE_TYPE(enc, ptr)) { | 300 | 29.3M | INVALID_CASES(ptr, nextTokPtr) | 301 | 277k | case BT_QUEST: | 302 | 277k | ptr += MINBPC(enc); | 303 | 277k | REQUIRE_CHAR(enc, ptr, end); | 304 | 277k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 305 | 96.6k | *nextTokPtr = ptr + MINBPC(enc); | 306 | 96.6k | return tok; | 307 | 96.6k | } | 308 | 180k | break; | 309 | 54.8M | default: | 310 | 54.8M | ptr += MINBPC(enc); | 311 | 54.8M | break; | 312 | 69.8M | } | 313 | 69.8M | } | 314 | 1.13k | return XML_TOK_PARTIAL; | 315 | 25.2k | case BT_QUEST: | 316 | 25.2k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 317 | 12 | *nextTokPtr = ptr; | 318 | 12 | return XML_TOK_INVALID; | 319 | 12 | } | 320 | 25.2k | ptr += MINBPC(enc); | 321 | 25.2k | REQUIRE_CHAR(enc, ptr, end); | 322 | 25.1k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 323 | 25.0k | *nextTokPtr = ptr + MINBPC(enc); | 324 | 25.0k | return tok; | 325 | 25.0k | } | 326 | | /* fall through */ | 327 | 234 | default: | 328 | 234 | *nextTokPtr = ptr; | 329 | 234 | return XML_TOK_INVALID; | 330 | 15.9M | } | 331 | 15.9M | } | 332 | 550 | return XML_TOK_PARTIAL; | 333 | 124k | } |
Line | Count | Source | 277 | 131k | const char **nextTokPtr) { | 278 | 131k | int tok; | 279 | 131k | const char *target = ptr; | 280 | 131k | REQUIRE_CHAR(enc, ptr, end); | 281 | 131k | switch (BYTE_TYPE(enc, ptr)) { | 282 | 126k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 283 | 111 | default: | 284 | 111 | *nextTokPtr = ptr; | 285 | 111 | return XML_TOK_INVALID; | 286 | 131k | } | 287 | 322k | while (HAS_CHAR(enc, ptr, end)) { | 288 | 322k | switch (BYTE_TYPE(enc, ptr)) { | 289 | 823k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 290 | 16.2k | case BT_S: | 291 | 76.0k | case BT_CR: | 292 | 120k | case BT_LF: | 293 | 120k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 294 | 12 | *nextTokPtr = ptr; | 295 | 12 | return XML_TOK_INVALID; | 296 | 12 | } | 297 | 120k | ptr += MINBPC(enc); | 298 | 40.1M | while (HAS_CHAR(enc, ptr, end)) { | 299 | 40.1M | switch (BYTE_TYPE(enc, ptr)) { | 300 | 3.34M | INVALID_CASES(ptr, nextTokPtr) | 301 | 175k | case BT_QUEST: | 302 | 175k | ptr += MINBPC(enc); | 303 | 175k | REQUIRE_CHAR(enc, ptr, end); | 304 | 174k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 305 | 115k | *nextTokPtr = ptr + MINBPC(enc); | 306 | 115k | return tok; | 307 | 115k | } | 308 | 59.9k | break; | 309 | 38.2M | default: | 310 | 38.2M | ptr += MINBPC(enc); | 311 | 38.2M | break; | 312 | 40.1M | } | 313 | 40.1M | } | 314 | 4.46k | return XML_TOK_PARTIAL; | 315 | 10.2k | case BT_QUEST: | 316 | 10.2k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 317 | 12 | *nextTokPtr = ptr; | 318 | 12 | return XML_TOK_INVALID; | 319 | 12 | } | 320 | 10.1k | ptr += MINBPC(enc); | 321 | 10.1k | REQUIRE_CHAR(enc, ptr, end); | 322 | 10.0k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 323 | 9.95k | *nextTokPtr = ptr + MINBPC(enc); | 324 | 9.95k | return tok; | 325 | 9.95k | } | 326 | | /* fall through */ | 327 | 182 | default: | 328 | 182 | *nextTokPtr = ptr; | 329 | 182 | return XML_TOK_INVALID; | 330 | 322k | } | 331 | 322k | } | 332 | 380 | return XML_TOK_PARTIAL; | 333 | 131k | } |
Line | Count | Source | 277 | 258k | const char **nextTokPtr) { | 278 | 258k | int tok; | 279 | 258k | const char *target = ptr; | 280 | 258k | REQUIRE_CHAR(enc, ptr, end); | 281 | 257k | switch (BYTE_TYPE(enc, ptr)) { | 282 | 241k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 283 | 98 | default: | 284 | 98 | *nextTokPtr = ptr; | 285 | 98 | return XML_TOK_INVALID; | 286 | 257k | } | 287 | 1.77M | while (HAS_CHAR(enc, ptr, end)) { | 288 | 1.77M | switch (BYTE_TYPE(enc, ptr)) { | 289 | 7.22M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 290 | 5.09k | case BT_S: | 291 | 119k | case BT_CR: | 292 | 215k | case BT_LF: | 293 | 215k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 294 | 12 | *nextTokPtr = ptr; | 295 | 12 | return XML_TOK_INVALID; | 296 | 12 | } | 297 | 215k | ptr += MINBPC(enc); | 298 | 15.4M | while (HAS_CHAR(enc, ptr, end)) { | 299 | 15.4M | switch (BYTE_TYPE(enc, ptr)) { | 300 | 953k | INVALID_CASES(ptr, nextTokPtr) | 301 | 284k | case BT_QUEST: | 302 | 284k | ptr += MINBPC(enc); | 303 | 284k | REQUIRE_CHAR(enc, ptr, end); | 304 | 284k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 305 | 210k | *nextTokPtr = ptr + MINBPC(enc); | 306 | 210k | return tok; | 307 | 210k | } | 308 | 73.5k | break; | 309 | 14.7M | default: | 310 | 14.7M | ptr += MINBPC(enc); | 311 | 14.7M | break; | 312 | 15.4M | } | 313 | 15.4M | } | 314 | 3.48k | return XML_TOK_PARTIAL; | 315 | 41.8k | case BT_QUEST: | 316 | 41.8k | if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { | 317 | 12 | *nextTokPtr = ptr; | 318 | 12 | return XML_TOK_INVALID; | 319 | 12 | } | 320 | 41.8k | ptr += MINBPC(enc); | 321 | 41.8k | REQUIRE_CHAR(enc, ptr, end); | 322 | 41.7k | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 323 | 41.6k | *nextTokPtr = ptr + MINBPC(enc); | 324 | 41.6k | return tok; | 325 | 41.6k | } | 326 | | /* fall through */ | 327 | 188 | default: | 328 | 188 | *nextTokPtr = ptr; | 329 | 188 | return XML_TOK_INVALID; | 330 | 1.77M | } | 331 | 1.77M | } | 332 | 522 | return XML_TOK_PARTIAL; | 333 | 257k | } |
|
334 | | |
335 | | static int PTRCALL |
336 | | PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, |
337 | 1.43M | const char **nextTokPtr) { |
338 | 1.43M | static const char CDATA_LSQB[] |
339 | 1.43M | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; |
340 | 1.43M | int i; |
341 | 1.43M | UNUSED_P(enc); |
342 | | /* CDATA[ */ |
343 | 1.43M | REQUIRE_CHARS(enc, ptr, end, 6); |
344 | 10.0M | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
345 | 8.58M | if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
346 | 496 | *nextTokPtr = ptr; |
347 | 496 | return XML_TOK_INVALID; |
348 | 496 | } |
349 | 8.58M | } |
350 | 1.43M | *nextTokPtr = ptr; |
351 | 1.43M | return XML_TOK_CDATA_SECT_OPEN; |
352 | 1.43M | } xmltok.c:normal_scanCdataSection Line | Count | Source | 337 | 1.42M | const char **nextTokPtr) { | 338 | 1.42M | static const char CDATA_LSQB[] | 339 | 1.42M | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; | 340 | 1.42M | int i; | 341 | 1.42M | UNUSED_P(enc); | 342 | | /* CDATA[ */ | 343 | 1.42M | REQUIRE_CHARS(enc, ptr, end, 6); | 344 | 9.94M | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { | 345 | 8.52M | if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { | 346 | 190 | *nextTokPtr = ptr; | 347 | 190 | return XML_TOK_INVALID; | 348 | 190 | } | 349 | 8.52M | } | 350 | 1.42M | *nextTokPtr = ptr; | 351 | 1.42M | return XML_TOK_CDATA_SECT_OPEN; | 352 | 1.42M | } |
xmltok.c:little2_scanCdataSection Line | Count | Source | 337 | 1.51k | const char **nextTokPtr) { | 338 | 1.51k | static const char CDATA_LSQB[] | 339 | 1.51k | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; | 340 | 1.51k | int i; | 341 | 1.51k | UNUSED_P(enc); | 342 | | /* CDATA[ */ | 343 | 1.51k | REQUIRE_CHARS(enc, ptr, end, 6); | 344 | 9.80k | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { | 345 | 8.47k | if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { | 346 | 141 | *nextTokPtr = ptr; | 347 | 141 | return XML_TOK_INVALID; | 348 | 141 | } | 349 | 8.47k | } | 350 | 1.33k | *nextTokPtr = ptr; | 351 | 1.33k | return XML_TOK_CDATA_SECT_OPEN; | 352 | 1.48k | } |
xmltok.c:big2_scanCdataSection Line | Count | Source | 337 | 9.20k | const char **nextTokPtr) { | 338 | 9.20k | static const char CDATA_LSQB[] | 339 | 9.20k | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; | 340 | 9.20k | int i; | 341 | 9.20k | UNUSED_P(enc); | 342 | | /* CDATA[ */ | 343 | 9.20k | REQUIRE_CHARS(enc, ptr, end, 6); | 344 | 63.2k | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { | 345 | 54.3k | if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { | 346 | 165 | *nextTokPtr = ptr; | 347 | 165 | return XML_TOK_INVALID; | 348 | 165 | } | 349 | 54.3k | } | 350 | 8.96k | *nextTokPtr = ptr; | 351 | 8.96k | return XML_TOK_CDATA_SECT_OPEN; | 352 | 9.12k | } |
|
353 | | |
354 | | static int PTRCALL |
355 | | PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, |
356 | 6.33M | const char **nextTokPtr) { |
357 | 6.33M | if (ptr >= end) |
358 | 5.17k | return XML_TOK_NONE; |
359 | 6.33M | if (MINBPC(enc) > 1) { |
360 | 1.12M | size_t n = end - ptr; |
361 | 1.12M | if (n & (MINBPC(enc) - 1)) { |
362 | 28.1k | n &= ~(MINBPC(enc) - 1); |
363 | 28.1k | if (n == 0) |
364 | 495 | return XML_TOK_PARTIAL; |
365 | 27.6k | end = ptr + n; |
366 | 27.6k | } |
367 | 1.12M | } |
368 | 6.32M | switch (BYTE_TYPE(enc, ptr)) { |
369 | 3.48M | case BT_RSQB: |
370 | 3.48M | ptr += MINBPC(enc); |
371 | 3.48M | REQUIRE_CHAR(enc, ptr, end); |
372 | 3.48M | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
373 | 226k | break; |
374 | 3.25M | ptr += MINBPC(enc); |
375 | 3.25M | REQUIRE_CHAR(enc, ptr, end); |
376 | 3.25M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
377 | 1.82M | ptr -= MINBPC(enc); |
378 | 1.82M | break; |
379 | 1.82M | } |
380 | 1.42M | *nextTokPtr = ptr + MINBPC(enc); |
381 | 1.42M | return XML_TOK_CDATA_SECT_CLOSE; |
382 | 1.86M | case BT_CR: |
383 | 1.86M | ptr += MINBPC(enc); |
384 | 1.86M | REQUIRE_CHAR(enc, ptr, end); |
385 | 1.85M | if (BYTE_TYPE(enc, ptr) == BT_LF) |
386 | 9.67k | ptr += MINBPC(enc); |
387 | 1.85M | *nextTokPtr = ptr; |
388 | 1.85M | return XML_TOK_DATA_NEWLINE; |
389 | 292k | case BT_LF: |
390 | 292k | *nextTokPtr = ptr + MINBPC(enc); |
391 | 292k | return XML_TOK_DATA_NEWLINE; |
392 | 705k | INVALID_CASES(ptr, nextTokPtr) |
393 | 338k | default: |
394 | 338k | ptr += MINBPC(enc); |
395 | 338k | break; |
396 | 6.32M | } |
397 | 40.7M | while (HAS_CHAR(enc, ptr, end)) { |
398 | 40.7M | switch (BYTE_TYPE(enc, ptr)) { |
399 | 0 | # define LEAD_CASE(n) \ |
400 | 26.0M | case BT_LEAD##n: \ |
401 | 26.0M | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
402 | 1.16k | *nextTokPtr = ptr; \ |
403 | 1.16k | return XML_TOK_DATA_CHARS; \ |
404 | 1.16k | } \ |
405 | 26.0M | ptr += n; \ |
406 | 26.0M | break; |
407 | 13.8M | LEAD_CASE(2) |
408 | 10.6M | LEAD_CASE(3) |
409 | 1.56M | LEAD_CASE(4) |
410 | 0 | # undef LEAD_CASE |
411 | 305 | case BT_NONXML: |
412 | 333 | case BT_MALFORM: |
413 | 431 | case BT_TRAIL: |
414 | 417k | case BT_CR: |
415 | 686k | case BT_LF: |
416 | 2.73M | case BT_RSQB: |
417 | 2.73M | *nextTokPtr = ptr; |
418 | 2.73M | return XML_TOK_DATA_CHARS; |
419 | 11.9M | default: |
420 | 11.9M | ptr += MINBPC(enc); |
421 | 11.9M | break; |
422 | 40.7M | } |
423 | 40.7M | } |
424 | 4.73k | *nextTokPtr = ptr; |
425 | 4.73k | return XML_TOK_DATA_CHARS; |
426 | 2.74M | } xmltok.c:normal_cdataSectionTok Line | Count | Source | 356 | 5.20M | const char **nextTokPtr) { | 357 | 5.20M | if (ptr >= end) | 358 | 2.18k | return XML_TOK_NONE; | 359 | 5.20M | if (MINBPC(enc) > 1) { | 360 | 0 | size_t n = end - ptr; | 361 | 0 | if (n & (MINBPC(enc) - 1)) { | 362 | 0 | n &= ~(MINBPC(enc) - 1); | 363 | 0 | if (n == 0) | 364 | 0 | return XML_TOK_PARTIAL; | 365 | 0 | end = ptr + n; | 366 | 0 | } | 367 | 0 | } | 368 | 5.20M | switch (BYTE_TYPE(enc, ptr)) { | 369 | 3.16M | case BT_RSQB: | 370 | 3.16M | ptr += MINBPC(enc); | 371 | 3.16M | REQUIRE_CHAR(enc, ptr, end); | 372 | 3.16M | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 373 | 67.7k | break; | 374 | 3.10M | ptr += MINBPC(enc); | 375 | 3.10M | REQUIRE_CHAR(enc, ptr, end); | 376 | 3.10M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 377 | 1.68M | ptr -= MINBPC(enc); | 378 | 1.68M | break; | 379 | 1.68M | } | 380 | 1.41M | *nextTokPtr = ptr + MINBPC(enc); | 381 | 1.41M | return XML_TOK_CDATA_SECT_CLOSE; | 382 | 1.14M | case BT_CR: | 383 | 1.14M | ptr += MINBPC(enc); | 384 | 1.14M | REQUIRE_CHAR(enc, ptr, end); | 385 | 1.14M | if (BYTE_TYPE(enc, ptr) == BT_LF) | 386 | 1.36k | ptr += MINBPC(enc); | 387 | 1.14M | *nextTokPtr = ptr; | 388 | 1.14M | return XML_TOK_DATA_NEWLINE; | 389 | 259k | case BT_LF: | 390 | 259k | *nextTokPtr = ptr + MINBPC(enc); | 391 | 259k | return XML_TOK_DATA_NEWLINE; | 392 | 694k | INVALID_CASES(ptr, nextTokPtr) | 393 | 280k | default: | 394 | 280k | ptr += MINBPC(enc); | 395 | 280k | break; | 396 | 5.20M | } | 397 | 34.4M | while (HAS_CHAR(enc, ptr, end)) { | 398 | 34.4M | switch (BYTE_TYPE(enc, ptr)) { | 399 | 0 | # define LEAD_CASE(n) \ | 400 | 0 | case BT_LEAD##n: \ | 401 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 402 | 0 | *nextTokPtr = ptr; \ | 403 | 0 | return XML_TOK_DATA_CHARS; \ | 404 | 0 | } \ | 405 | 0 | ptr += n; \ | 406 | 0 | break; | 407 | 13.8M | LEAD_CASE(2) | 408 | 10.6M | LEAD_CASE(3) | 409 | 856k | LEAD_CASE(4) | 410 | 0 | # undef LEAD_CASE | 411 | 87 | case BT_NONXML: | 412 | 115 | case BT_MALFORM: | 413 | 151 | case BT_TRAIL: | 414 | 390k | case BT_CR: | 415 | 631k | case BT_LF: | 416 | 2.37M | case BT_RSQB: | 417 | 2.37M | *nextTokPtr = ptr; | 418 | 2.37M | return XML_TOK_DATA_CHARS; | 419 | 6.69M | default: | 420 | 6.69M | ptr += MINBPC(enc); | 421 | 6.69M | break; | 422 | 34.4M | } | 423 | 34.4M | } | 424 | 1.96k | *nextTokPtr = ptr; | 425 | 1.96k | return XML_TOK_DATA_CHARS; | 426 | 2.37M | } |
xmltok.c:little2_cdataSectionTok Line | Count | Source | 356 | 682k | const char **nextTokPtr) { | 357 | 682k | if (ptr >= end) | 358 | 1.55k | return XML_TOK_NONE; | 359 | 680k | if (MINBPC(enc) > 1) { | 360 | 680k | size_t n = end - ptr; | 361 | 680k | if (n & (MINBPC(enc) - 1)) { | 362 | 12.6k | n &= ~(MINBPC(enc) - 1); | 363 | 12.6k | if (n == 0) | 364 | 233 | return XML_TOK_PARTIAL; | 365 | 12.4k | end = ptr + n; | 366 | 12.4k | } | 367 | 680k | } | 368 | 680k | switch (BYTE_TYPE(enc, ptr)) { | 369 | 251k | case BT_RSQB: | 370 | 251k | ptr += MINBPC(enc); | 371 | 251k | REQUIRE_CHAR(enc, ptr, end); | 372 | 250k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 373 | 130k | break; | 374 | 120k | ptr += MINBPC(enc); | 375 | 120k | REQUIRE_CHAR(enc, ptr, end); | 376 | 120k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 377 | 119k | ptr -= MINBPC(enc); | 378 | 119k | break; | 379 | 119k | } | 380 | 659 | *nextTokPtr = ptr + MINBPC(enc); | 381 | 659 | return XML_TOK_CDATA_SECT_CLOSE; | 382 | 367k | case BT_CR: | 383 | 367k | ptr += MINBPC(enc); | 384 | 367k | REQUIRE_CHAR(enc, ptr, end); | 385 | 367k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 386 | 2.08k | ptr += MINBPC(enc); | 387 | 367k | *nextTokPtr = ptr; | 388 | 367k | return XML_TOK_DATA_NEWLINE; | 389 | 23.7k | case BT_LF: | 390 | 23.7k | *nextTokPtr = ptr + MINBPC(enc); | 391 | 23.7k | return XML_TOK_DATA_NEWLINE; | 392 | 23.7k | INVALID_CASES(ptr, nextTokPtr) | 393 | 34.6k | default: | 394 | 34.6k | ptr += MINBPC(enc); | 395 | 34.6k | break; | 396 | 680k | } | 397 | 3.68M | while (HAS_CHAR(enc, ptr, end)) { | 398 | 3.68M | switch (BYTE_TYPE(enc, ptr)) { | 399 | 0 | # define LEAD_CASE(n) \ | 400 | 0 | case BT_LEAD##n: \ | 401 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 402 | 0 | *nextTokPtr = ptr; \ | 403 | 0 | return XML_TOK_DATA_CHARS; \ | 404 | 0 | } \ | 405 | 0 | ptr += n; \ | 406 | 0 | break; | 407 | 0 | LEAD_CASE(2) | 408 | 0 | LEAD_CASE(3) | 409 | 301k | LEAD_CASE(4) | 410 | 0 | # undef LEAD_CASE | 411 | 85 | case BT_NONXML: | 412 | 85 | case BT_MALFORM: | 413 | 117 | case BT_TRAIL: | 414 | 17.4k | case BT_CR: | 415 | 37.5k | case BT_LF: | 416 | 286k | case BT_RSQB: | 417 | 286k | *nextTokPtr = ptr; | 418 | 286k | return XML_TOK_DATA_CHARS; | 419 | 3.09M | default: | 420 | 3.09M | ptr += MINBPC(enc); | 421 | 3.09M | break; | 422 | 3.68M | } | 423 | 3.68M | } | 424 | 1.43k | *nextTokPtr = ptr; | 425 | 1.43k | return XML_TOK_DATA_CHARS; | 426 | 287k | } |
xmltok.c:big2_cdataSectionTok Line | Count | Source | 356 | 447k | const char **nextTokPtr) { | 357 | 447k | if (ptr >= end) | 358 | 1.42k | return XML_TOK_NONE; | 359 | 445k | if (MINBPC(enc) > 1) { | 360 | 445k | size_t n = end - ptr; | 361 | 445k | if (n & (MINBPC(enc) - 1)) { | 362 | 15.4k | n &= ~(MINBPC(enc) - 1); | 363 | 15.4k | if (n == 0) | 364 | 262 | return XML_TOK_PARTIAL; | 365 | 15.1k | end = ptr + n; | 366 | 15.1k | } | 367 | 445k | } | 368 | 445k | switch (BYTE_TYPE(enc, ptr)) { | 369 | 64.4k | case BT_RSQB: | 370 | 64.4k | ptr += MINBPC(enc); | 371 | 64.4k | REQUIRE_CHAR(enc, ptr, end); | 372 | 64.4k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 373 | 28.3k | break; | 374 | 36.0k | ptr += MINBPC(enc); | 375 | 36.0k | REQUIRE_CHAR(enc, ptr, end); | 376 | 36.0k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 377 | 27.7k | ptr -= MINBPC(enc); | 378 | 27.7k | break; | 379 | 27.7k | } | 380 | 8.24k | *nextTokPtr = ptr + MINBPC(enc); | 381 | 8.24k | return XML_TOK_CDATA_SECT_CLOSE; | 382 | 344k | case BT_CR: | 383 | 344k | ptr += MINBPC(enc); | 384 | 344k | REQUIRE_CHAR(enc, ptr, end); | 385 | 344k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 386 | 6.22k | ptr += MINBPC(enc); | 387 | 344k | *nextTokPtr = ptr; | 388 | 344k | return XML_TOK_DATA_NEWLINE; | 389 | 9.81k | case BT_LF: | 390 | 9.81k | *nextTokPtr = ptr + MINBPC(enc); | 391 | 9.81k | return XML_TOK_DATA_NEWLINE; | 392 | 9.81k | INVALID_CASES(ptr, nextTokPtr) | 393 | 23.8k | default: | 394 | 23.8k | ptr += MINBPC(enc); | 395 | 23.8k | break; | 396 | 445k | } | 397 | 2.67M | while (HAS_CHAR(enc, ptr, end)) { | 398 | 2.67M | switch (BYTE_TYPE(enc, ptr)) { | 399 | 0 | # define LEAD_CASE(n) \ | 400 | 0 | case BT_LEAD##n: \ | 401 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 402 | 0 | *nextTokPtr = ptr; \ | 403 | 0 | return XML_TOK_DATA_CHARS; \ | 404 | 0 | } \ | 405 | 0 | ptr += n; \ | 406 | 0 | break; | 407 | 0 | LEAD_CASE(2) | 408 | 0 | LEAD_CASE(3) | 409 | 403k | LEAD_CASE(4) | 410 | 0 | # undef LEAD_CASE | 411 | 133 | case BT_NONXML: | 412 | 133 | case BT_MALFORM: | 413 | 163 | case BT_TRAIL: | 414 | 8.84k | case BT_CR: | 415 | 17.0k | case BT_LF: | 416 | 80.1k | case BT_RSQB: | 417 | 80.1k | *nextTokPtr = ptr; | 418 | 80.1k | return XML_TOK_DATA_CHARS; | 419 | 2.18M | default: | 420 | 2.18M | ptr += MINBPC(enc); | 421 | 2.18M | break; | 422 | 2.67M | } | 423 | 2.67M | } | 424 | 1.34k | *nextTokPtr = ptr; | 425 | 1.34k | return XML_TOK_DATA_CHARS; | 426 | 81.8k | } |
|
427 | | |
428 | | /* ptr points to character following "</" */ |
429 | | |
430 | | static int PTRCALL |
431 | | PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, |
432 | 199k | const char **nextTokPtr) { |
433 | 199k | REQUIRE_CHAR(enc, ptr, end); |
434 | 199k | switch (BYTE_TYPE(enc, ptr)) { |
435 | 104k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
436 | 97 | default: |
437 | 97 | *nextTokPtr = ptr; |
438 | 97 | return XML_TOK_INVALID; |
439 | 199k | } |
440 | 10.9M | while (HAS_CHAR(enc, ptr, end)) { |
441 | 10.9M | switch (BYTE_TYPE(enc, ptr)) { |
442 | 22.3M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
443 | 2.06k | case BT_S: |
444 | 43.0k | case BT_CR: |
445 | 44.9k | case BT_LF: |
446 | 1.31M | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
447 | 1.31M | switch (BYTE_TYPE(enc, ptr)) { |
448 | 3.12k | case BT_S: |
449 | 1.26M | case BT_CR: |
450 | 1.26M | case BT_LF: |
451 | 1.26M | break; |
452 | 44.3k | case BT_GT: |
453 | 44.3k | *nextTokPtr = ptr + MINBPC(enc); |
454 | 44.3k | return XML_TOK_END_TAG; |
455 | 210 | default: |
456 | 210 | *nextTokPtr = ptr; |
457 | 210 | return XML_TOK_INVALID; |
458 | 1.31M | } |
459 | 1.31M | } |
460 | 384 | return XML_TOK_PARTIAL; |
461 | 0 | # ifdef XML_NS |
462 | 7.55k | case BT_COLON: |
463 | | /* no need to check qname syntax here, |
464 | | since end-tag must match exactly */ |
465 | 7.55k | ptr += MINBPC(enc); |
466 | 7.55k | break; |
467 | 0 | # endif |
468 | 152k | case BT_GT: |
469 | 152k | *nextTokPtr = ptr + MINBPC(enc); |
470 | 152k | return XML_TOK_END_TAG; |
471 | 206 | default: |
472 | 206 | *nextTokPtr = ptr; |
473 | 206 | return XML_TOK_INVALID; |
474 | 10.9M | } |
475 | 10.9M | } |
476 | 897 | return XML_TOK_PARTIAL; |
477 | 199k | } xmltok.c:normal_scanEndTag Line | Count | Source | 432 | 155k | const char **nextTokPtr) { | 433 | 155k | REQUIRE_CHAR(enc, ptr, end); | 434 | 155k | switch (BYTE_TYPE(enc, ptr)) { | 435 | 59.8k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 436 | 21 | default: | 437 | 21 | *nextTokPtr = ptr; | 438 | 21 | return XML_TOK_INVALID; | 439 | 155k | } | 440 | 10.6M | while (HAS_CHAR(enc, ptr, end)) { | 441 | 10.6M | switch (BYTE_TYPE(enc, ptr)) { | 442 | 21.5M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 443 | 995 | case BT_S: | 444 | 1.14k | case BT_CR: | 445 | 2.69k | case BT_LF: | 446 | 1.23M | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 447 | 1.23M | switch (BYTE_TYPE(enc, ptr)) { | 448 | 2.36k | case BT_S: | 449 | 1.22M | case BT_CR: | 450 | 1.23M | case BT_LF: | 451 | 1.23M | break; | 452 | 2.45k | case BT_GT: | 453 | 2.45k | *nextTokPtr = ptr + MINBPC(enc); | 454 | 2.45k | return XML_TOK_END_TAG; | 455 | 97 | default: | 456 | 97 | *nextTokPtr = ptr; | 457 | 97 | return XML_TOK_INVALID; | 458 | 1.23M | } | 459 | 1.23M | } | 460 | 144 | return XML_TOK_PARTIAL; | 461 | 0 | # ifdef XML_NS | 462 | 7.07k | case BT_COLON: | 463 | | /* no need to check qname syntax here, | 464 | | since end-tag must match exactly */ | 465 | 7.07k | ptr += MINBPC(enc); | 466 | 7.07k | break; | 467 | 0 | # endif | 468 | 151k | case BT_GT: | 469 | 151k | *nextTokPtr = ptr + MINBPC(enc); | 470 | 151k | return XML_TOK_END_TAG; | 471 | 112 | default: | 472 | 112 | *nextTokPtr = ptr; | 473 | 112 | return XML_TOK_INVALID; | 474 | 10.6M | } | 475 | 10.6M | } | 476 | 363 | return XML_TOK_PARTIAL; | 477 | 155k | } |
xmltok.c:little2_scanEndTag Line | Count | Source | 432 | 2.03k | const char **nextTokPtr) { | 433 | 2.03k | REQUIRE_CHAR(enc, ptr, end); | 434 | 2.02k | switch (BYTE_TYPE(enc, ptr)) { | 435 | 2.20k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 436 | 38 | default: | 437 | 38 | *nextTokPtr = ptr; | 438 | 38 | return XML_TOK_INVALID; | 439 | 2.02k | } | 440 | 51.6k | while (HAS_CHAR(enc, ptr, end)) { | 441 | 51.6k | switch (BYTE_TYPE(enc, ptr)) { | 442 | 244k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 443 | 593 | case BT_S: | 444 | 989 | case BT_CR: | 445 | 1.15k | case BT_LF: | 446 | 14.5k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 447 | 14.5k | switch (BYTE_TYPE(enc, ptr)) { | 448 | 384 | case BT_S: | 449 | 13.1k | case BT_CR: | 450 | 13.4k | case BT_LF: | 451 | 13.4k | break; | 452 | 998 | case BT_GT: | 453 | 998 | *nextTokPtr = ptr + MINBPC(enc); | 454 | 998 | return XML_TOK_END_TAG; | 455 | 60 | default: | 456 | 60 | *nextTokPtr = ptr; | 457 | 60 | return XML_TOK_INVALID; | 458 | 14.5k | } | 459 | 14.5k | } | 460 | 95 | return XML_TOK_PARTIAL; | 461 | 0 | # ifdef XML_NS | 462 | 262 | case BT_COLON: | 463 | | /* no need to check qname syntax here, | 464 | | since end-tag must match exactly */ | 465 | 262 | ptr += MINBPC(enc); | 466 | 262 | break; | 467 | 0 | # endif | 468 | 361 | case BT_GT: | 469 | 361 | *nextTokPtr = ptr + MINBPC(enc); | 470 | 361 | return XML_TOK_END_TAG; | 471 | 50 | default: | 472 | 50 | *nextTokPtr = ptr; | 473 | 50 | return XML_TOK_INVALID; | 474 | 51.6k | } | 475 | 51.6k | } | 476 | 240 | return XML_TOK_PARTIAL; | 477 | 1.92k | } |
Line | Count | Source | 432 | 42.1k | const char **nextTokPtr) { | 433 | 42.1k | REQUIRE_CHAR(enc, ptr, end); | 434 | 42.1k | switch (BYTE_TYPE(enc, ptr)) { | 435 | 42.2k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 436 | 38 | default: | 437 | 38 | *nextTokPtr = ptr; | 438 | 38 | return XML_TOK_INVALID; | 439 | 42.1k | } | 440 | 190k | while (HAS_CHAR(enc, ptr, end)) { | 441 | 190k | switch (BYTE_TYPE(enc, ptr)) { | 442 | 585k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 443 | 478 | case BT_S: | 444 | 40.8k | case BT_CR: | 445 | 41.1k | case BT_LF: | 446 | 66.5k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 447 | 66.5k | switch (BYTE_TYPE(enc, ptr)) { | 448 | 377 | case BT_S: | 449 | 25.3k | case BT_CR: | 450 | 25.5k | case BT_LF: | 451 | 25.5k | break; | 452 | 40.9k | case BT_GT: | 453 | 40.9k | *nextTokPtr = ptr + MINBPC(enc); | 454 | 40.9k | return XML_TOK_END_TAG; | 455 | 53 | default: | 456 | 53 | *nextTokPtr = ptr; | 457 | 53 | return XML_TOK_INVALID; | 458 | 66.5k | } | 459 | 66.5k | } | 460 | 145 | return XML_TOK_PARTIAL; | 461 | 0 | # ifdef XML_NS | 462 | 216 | case BT_COLON: | 463 | | /* no need to check qname syntax here, | 464 | | since end-tag must match exactly */ | 465 | 216 | ptr += MINBPC(enc); | 466 | 216 | break; | 467 | 0 | # endif | 468 | 408 | case BT_GT: | 469 | 408 | *nextTokPtr = ptr + MINBPC(enc); | 470 | 408 | return XML_TOK_END_TAG; | 471 | 44 | default: | 472 | 44 | *nextTokPtr = ptr; | 473 | 44 | return XML_TOK_INVALID; | 474 | 190k | } | 475 | 190k | } | 476 | 294 | return XML_TOK_PARTIAL; | 477 | 42.0k | } |
|
478 | | |
479 | | /* ptr points to character following "&#X" */ |
480 | | |
481 | | static int PTRCALL |
482 | | PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
483 | 566k | const char **nextTokPtr) { |
484 | 566k | if (HAS_CHAR(enc, ptr, end)) { |
485 | 566k | switch (BYTE_TYPE(enc, ptr)) { |
486 | 74.4k | case BT_DIGIT: |
487 | 566k | case BT_HEX: |
488 | 566k | break; |
489 | 79 | default: |
490 | 79 | *nextTokPtr = ptr; |
491 | 79 | return XML_TOK_INVALID; |
492 | 566k | } |
493 | 1.34M | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
494 | 1.34M | switch (BYTE_TYPE(enc, ptr)) { |
495 | 59.1k | case BT_DIGIT: |
496 | 781k | case BT_HEX: |
497 | 781k | break; |
498 | 565k | case BT_SEMI: |
499 | 565k | *nextTokPtr = ptr + MINBPC(enc); |
500 | 565k | return XML_TOK_CHAR_REF; |
501 | 138 | default: |
502 | 138 | *nextTokPtr = ptr; |
503 | 138 | return XML_TOK_INVALID; |
504 | 1.34M | } |
505 | 1.34M | } |
506 | 566k | } |
507 | 1.03k | return XML_TOK_PARTIAL; |
508 | 566k | } xmltok.c:normal_scanHexCharRef Line | Count | Source | 483 | 373k | const char **nextTokPtr) { | 484 | 373k | if (HAS_CHAR(enc, ptr, end)) { | 485 | 373k | switch (BYTE_TYPE(enc, ptr)) { | 486 | 38.0k | case BT_DIGIT: | 487 | 373k | case BT_HEX: | 488 | 373k | break; | 489 | 29 | default: | 490 | 29 | *nextTokPtr = ptr; | 491 | 29 | return XML_TOK_INVALID; | 492 | 373k | } | 493 | 475k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 494 | 475k | switch (BYTE_TYPE(enc, ptr)) { | 495 | 20.1k | case BT_DIGIT: | 496 | 101k | case BT_HEX: | 497 | 101k | break; | 498 | 373k | case BT_SEMI: | 499 | 373k | *nextTokPtr = ptr + MINBPC(enc); | 500 | 373k | return XML_TOK_CHAR_REF; | 501 | 35 | default: | 502 | 35 | *nextTokPtr = ptr; | 503 | 35 | return XML_TOK_INVALID; | 504 | 475k | } | 505 | 475k | } | 506 | 373k | } | 507 | 271 | return XML_TOK_PARTIAL; | 508 | 373k | } |
xmltok.c:little2_scanHexCharRef Line | Count | Source | 483 | 45.4k | const char **nextTokPtr) { | 484 | 45.4k | if (HAS_CHAR(enc, ptr, end)) { | 485 | 45.3k | switch (BYTE_TYPE(enc, ptr)) { | 486 | 13.6k | case BT_DIGIT: | 487 | 45.3k | case BT_HEX: | 488 | 45.3k | break; | 489 | 35 | default: | 490 | 35 | *nextTokPtr = ptr; | 491 | 35 | return XML_TOK_INVALID; | 492 | 45.3k | } | 493 | 182k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 494 | 182k | switch (BYTE_TYPE(enc, ptr)) { | 495 | 16.5k | case BT_DIGIT: | 496 | 137k | case BT_HEX: | 497 | 137k | break; | 498 | 45.0k | case BT_SEMI: | 499 | 45.0k | *nextTokPtr = ptr + MINBPC(enc); | 500 | 45.0k | return XML_TOK_CHAR_REF; | 501 | 39 | default: | 502 | 39 | *nextTokPtr = ptr; | 503 | 39 | return XML_TOK_INVALID; | 504 | 182k | } | 505 | 182k | } | 506 | 45.3k | } | 507 | 235 | return XML_TOK_PARTIAL; | 508 | 45.4k | } |
xmltok.c:big2_scanHexCharRef Line | Count | Source | 483 | 147k | const char **nextTokPtr) { | 484 | 147k | if (HAS_CHAR(enc, ptr, end)) { | 485 | 147k | switch (BYTE_TYPE(enc, ptr)) { | 486 | 22.7k | case BT_DIGIT: | 487 | 147k | case BT_HEX: | 488 | 147k | break; | 489 | 15 | default: | 490 | 15 | *nextTokPtr = ptr; | 491 | 15 | return XML_TOK_INVALID; | 492 | 147k | } | 493 | 688k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 494 | 688k | switch (BYTE_TYPE(enc, ptr)) { | 495 | 22.4k | case BT_DIGIT: | 496 | 541k | case BT_HEX: | 497 | 541k | break; | 498 | 146k | case BT_SEMI: | 499 | 146k | *nextTokPtr = ptr + MINBPC(enc); | 500 | 146k | return XML_TOK_CHAR_REF; | 501 | 64 | default: | 502 | 64 | *nextTokPtr = ptr; | 503 | 64 | return XML_TOK_INVALID; | 504 | 688k | } | 505 | 688k | } | 506 | 147k | } | 507 | 533 | return XML_TOK_PARTIAL; | 508 | 147k | } |
|
509 | | |
510 | | /* ptr points to character following "&#" */ |
511 | | |
512 | | static int PTRCALL |
513 | | PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
514 | 588k | const char **nextTokPtr) { |
515 | 588k | if (HAS_CHAR(enc, ptr, end)) { |
516 | 588k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
517 | 566k | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
518 | 22.1k | switch (BYTE_TYPE(enc, ptr)) { |
519 | 21.9k | case BT_DIGIT: |
520 | 21.9k | break; |
521 | 190 | default: |
522 | 190 | *nextTokPtr = ptr; |
523 | 190 | return XML_TOK_INVALID; |
524 | 22.1k | } |
525 | 89.2k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
526 | 89.2k | switch (BYTE_TYPE(enc, ptr)) { |
527 | 67.4k | case BT_DIGIT: |
528 | 67.4k | break; |
529 | 21.8k | case BT_SEMI: |
530 | 21.8k | *nextTokPtr = ptr + MINBPC(enc); |
531 | 21.8k | return XML_TOK_CHAR_REF; |
532 | 70 | default: |
533 | 70 | *nextTokPtr = ptr; |
534 | 70 | return XML_TOK_INVALID; |
535 | 89.2k | } |
536 | 89.2k | } |
537 | 21.9k | } |
538 | 360 | return XML_TOK_PARTIAL; |
539 | 588k | } xmltok.c:normal_scanCharRef Line | Count | Source | 514 | 385k | const char **nextTokPtr) { | 515 | 385k | if (HAS_CHAR(enc, ptr, end)) { | 516 | 385k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) | 517 | 373k | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 518 | 11.8k | switch (BYTE_TYPE(enc, ptr)) { | 519 | 11.8k | case BT_DIGIT: | 520 | 11.8k | break; | 521 | 99 | default: | 522 | 99 | *nextTokPtr = ptr; | 523 | 99 | return XML_TOK_INVALID; | 524 | 11.8k | } | 525 | 62.5k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 526 | 62.5k | switch (BYTE_TYPE(enc, ptr)) { | 527 | 50.7k | case BT_DIGIT: | 528 | 50.7k | break; | 529 | 11.7k | case BT_SEMI: | 530 | 11.7k | *nextTokPtr = ptr + MINBPC(enc); | 531 | 11.7k | return XML_TOK_CHAR_REF; | 532 | 14 | default: | 533 | 14 | *nextTokPtr = ptr; | 534 | 14 | return XML_TOK_INVALID; | 535 | 62.5k | } | 536 | 62.5k | } | 537 | 11.8k | } | 538 | 149 | return XML_TOK_PARTIAL; | 539 | 385k | } |
xmltok.c:little2_scanCharRef Line | Count | Source | 514 | 55.1k | const char **nextTokPtr) { | 515 | 55.1k | if (HAS_CHAR(enc, ptr, end)) { | 516 | 55.1k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) | 517 | 45.4k | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 518 | 9.74k | switch (BYTE_TYPE(enc, ptr)) { | 519 | 9.69k | case BT_DIGIT: | 520 | 9.69k | break; | 521 | 46 | default: | 522 | 46 | *nextTokPtr = ptr; | 523 | 46 | return XML_TOK_INVALID; | 524 | 9.74k | } | 525 | 25.4k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 526 | 25.4k | switch (BYTE_TYPE(enc, ptr)) { | 527 | 15.7k | case BT_DIGIT: | 528 | 15.7k | break; | 529 | 9.65k | case BT_SEMI: | 530 | 9.65k | *nextTokPtr = ptr + MINBPC(enc); | 531 | 9.65k | return XML_TOK_CHAR_REF; | 532 | 24 | default: | 533 | 24 | *nextTokPtr = ptr; | 534 | 24 | return XML_TOK_INVALID; | 535 | 25.4k | } | 536 | 25.4k | } | 537 | 9.69k | } | 538 | 68 | return XML_TOK_PARTIAL; | 539 | 55.1k | } |
xmltok.c:big2_scanCharRef Line | Count | Source | 514 | 148k | const char **nextTokPtr) { | 515 | 148k | if (HAS_CHAR(enc, ptr, end)) { | 516 | 148k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) | 517 | 147k | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 518 | 523 | switch (BYTE_TYPE(enc, ptr)) { | 519 | 478 | case BT_DIGIT: | 520 | 478 | break; | 521 | 45 | default: | 522 | 45 | *nextTokPtr = ptr; | 523 | 45 | return XML_TOK_INVALID; | 524 | 523 | } | 525 | 1.29k | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 526 | 1.29k | switch (BYTE_TYPE(enc, ptr)) { | 527 | 847 | case BT_DIGIT: | 528 | 847 | break; | 529 | 416 | case BT_SEMI: | 530 | 416 | *nextTokPtr = ptr + MINBPC(enc); | 531 | 416 | return XML_TOK_CHAR_REF; | 532 | 32 | default: | 533 | 32 | *nextTokPtr = ptr; | 534 | 32 | return XML_TOK_INVALID; | 535 | 1.29k | } | 536 | 1.29k | } | 537 | 478 | } | 538 | 143 | return XML_TOK_PARTIAL; | 539 | 148k | } |
|
540 | | |
541 | | /* ptr points to character following "&" */ |
542 | | |
543 | | static int PTRCALL |
544 | | PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
545 | 7.73M | const char **nextTokPtr) { |
546 | 7.73M | REQUIRE_CHAR(enc, ptr, end); |
547 | 7.73M | switch (BYTE_TYPE(enc, ptr)) { |
548 | 6.57M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
549 | 588k | case BT_NUM: |
550 | 588k | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
551 | 1.03k | default: |
552 | 1.03k | *nextTokPtr = ptr; |
553 | 1.03k | return XML_TOK_INVALID; |
554 | 7.73M | } |
555 | 33.9M | while (HAS_CHAR(enc, ptr, end)) { |
556 | 33.9M | switch (BYTE_TYPE(enc, ptr)) { |
557 | 91.2M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
558 | 7.14M | case BT_SEMI: |
559 | 7.14M | *nextTokPtr = ptr + MINBPC(enc); |
560 | 7.14M | return XML_TOK_ENTITY_REF; |
561 | 849 | default: |
562 | 849 | *nextTokPtr = ptr; |
563 | 849 | return XML_TOK_INVALID; |
564 | 33.9M | } |
565 | 33.9M | } |
566 | 1.35k | return XML_TOK_PARTIAL; |
567 | 7.14M | } Line | Count | Source | 545 | 7.13M | const char **nextTokPtr) { | 546 | 7.13M | REQUIRE_CHAR(enc, ptr, end); | 547 | 7.13M | switch (BYTE_TYPE(enc, ptr)) { | 548 | 6.53M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 549 | 385k | case BT_NUM: | 550 | 385k | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 551 | 880 | default: | 552 | 880 | *nextTokPtr = ptr; | 553 | 880 | return XML_TOK_INVALID; | 554 | 7.13M | } | 555 | 33.1M | while (HAS_CHAR(enc, ptr, end)) { | 556 | 33.1M | switch (BYTE_TYPE(enc, ptr)) { | 557 | 89.3M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 558 | 6.74M | case BT_SEMI: | 559 | 6.74M | *nextTokPtr = ptr + MINBPC(enc); | 560 | 6.74M | return XML_TOK_ENTITY_REF; | 561 | 725 | default: | 562 | 725 | *nextTokPtr = ptr; | 563 | 725 | return XML_TOK_INVALID; | 564 | 33.1M | } | 565 | 33.1M | } | 566 | 718 | return XML_TOK_PARTIAL; | 567 | 6.75M | } |
Line | Count | Source | 545 | 118k | const char **nextTokPtr) { | 546 | 118k | REQUIRE_CHAR(enc, ptr, end); | 547 | 117k | switch (BYTE_TYPE(enc, ptr)) { | 548 | 17.9k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 549 | 55.1k | case BT_NUM: | 550 | 55.1k | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 551 | 68 | default: | 552 | 68 | *nextTokPtr = ptr; | 553 | 68 | return XML_TOK_INVALID; | 554 | 117k | } | 555 | 157k | while (HAS_CHAR(enc, ptr, end)) { | 556 | 157k | switch (BYTE_TYPE(enc, ptr)) { | 557 | 418k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 558 | 62.0k | case BT_SEMI: | 559 | 62.0k | *nextTokPtr = ptr + MINBPC(enc); | 560 | 62.0k | return XML_TOK_ENTITY_REF; | 561 | 58 | default: | 562 | 58 | *nextTokPtr = ptr; | 563 | 58 | return XML_TOK_INVALID; | 564 | 157k | } | 565 | 157k | } | 566 | 300 | return XML_TOK_PARTIAL; | 567 | 62.5k | } |
Line | Count | Source | 545 | 479k | const char **nextTokPtr) { | 546 | 479k | REQUIRE_CHAR(enc, ptr, end); | 547 | 479k | switch (BYTE_TYPE(enc, ptr)) { | 548 | 21.8k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 549 | 148k | case BT_NUM: | 550 | 148k | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 551 | 84 | default: | 552 | 84 | *nextTokPtr = ptr; | 553 | 84 | return XML_TOK_INVALID; | 554 | 479k | } | 555 | 641k | while (HAS_CHAR(enc, ptr, end)) { | 556 | 641k | switch (BYTE_TYPE(enc, ptr)) { | 557 | 1.41M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 558 | 330k | case BT_SEMI: | 559 | 330k | *nextTokPtr = ptr + MINBPC(enc); | 560 | 330k | return XML_TOK_ENTITY_REF; | 561 | 66 | default: | 562 | 66 | *nextTokPtr = ptr; | 563 | 66 | return XML_TOK_INVALID; | 564 | 641k | } | 565 | 641k | } | 566 | 341 | return XML_TOK_PARTIAL; | 567 | 331k | } |
|
568 | | |
569 | | /* ptr points to character following first character of attribute name */ |
570 | | |
571 | | static int PTRCALL |
572 | | PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
573 | 1.04M | const char **nextTokPtr) { |
574 | 1.04M | # ifdef XML_NS |
575 | 1.04M | int hadColon = 0; |
576 | 1.04M | # endif |
577 | 49.7M | while (HAS_CHAR(enc, ptr, end)) { |
578 | 49.7M | switch (BYTE_TYPE(enc, ptr)) { |
579 | 143M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
580 | 0 | # ifdef XML_NS |
581 | 200k | case BT_COLON: |
582 | 200k | if (hadColon) { |
583 | 31 | *nextTokPtr = ptr; |
584 | 31 | return XML_TOK_INVALID; |
585 | 31 | } |
586 | 200k | hadColon = 1; |
587 | 200k | ptr += MINBPC(enc); |
588 | 200k | REQUIRE_CHAR(enc, ptr, end); |
589 | 200k | switch (BYTE_TYPE(enc, ptr)) { |
590 | 194k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
591 | 93 | default: |
592 | 93 | *nextTokPtr = ptr; |
593 | 93 | return XML_TOK_INVALID; |
594 | 200k | } |
595 | 199k | break; |
596 | 199k | # endif |
597 | 199k | case BT_S: |
598 | 10.3k | case BT_CR: |
599 | 34.7k | case BT_LF: |
600 | 632k | for (;;) { |
601 | 632k | int t; |
602 | | |
603 | 632k | ptr += MINBPC(enc); |
604 | 632k | REQUIRE_CHAR(enc, ptr, end); |
605 | 631k | t = BYTE_TYPE(enc, ptr); |
606 | 631k | if (t == BT_EQUALS) |
607 | 33.9k | break; |
608 | 597k | switch (t) { |
609 | 123k | case BT_S: |
610 | 285k | case BT_LF: |
611 | 597k | case BT_CR: |
612 | 597k | break; |
613 | 244 | default: |
614 | 244 | *nextTokPtr = ptr; |
615 | 244 | return XML_TOK_INVALID; |
616 | 597k | } |
617 | 597k | } |
618 | | /* fall through */ |
619 | 5.67M | case BT_EQUALS: { |
620 | 5.67M | int open; |
621 | 5.67M | # ifdef XML_NS |
622 | 5.67M | hadColon = 0; |
623 | 5.67M | # endif |
624 | 6.21M | for (;;) { |
625 | 6.21M | ptr += MINBPC(enc); |
626 | 6.21M | REQUIRE_CHAR(enc, ptr, end); |
627 | 6.21M | open = BYTE_TYPE(enc, ptr); |
628 | 6.21M | if (open == BT_QUOT || open == BT_APOS) |
629 | 5.67M | break; |
630 | 544k | switch (open) { |
631 | 510k | case BT_S: |
632 | 524k | case BT_LF: |
633 | 543k | case BT_CR: |
634 | 543k | break; |
635 | 171 | default: |
636 | 171 | *nextTokPtr = ptr; |
637 | 171 | return XML_TOK_INVALID; |
638 | 544k | } |
639 | 544k | } |
640 | 5.67M | ptr += MINBPC(enc); |
641 | | /* in attribute value */ |
642 | 247M | for (;;) { |
643 | 247M | int t; |
644 | 247M | REQUIRE_CHAR(enc, ptr, end); |
645 | 247M | t = BYTE_TYPE(enc, ptr); |
646 | 247M | if (t == open) |
647 | 5.66M | break; |
648 | 242M | switch (t) { |
649 | 22.0M | INVALID_CASES(ptr, nextTokPtr) |
650 | 390k | case BT_AMP: { |
651 | 390k | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
652 | 390k | if (tok <= 0) { |
653 | 460 | if (tok == XML_TOK_INVALID) |
654 | 123 | *nextTokPtr = ptr; |
655 | 460 | return tok; |
656 | 460 | } |
657 | 390k | break; |
658 | 390k | } |
659 | 390k | case BT_LT: |
660 | 118 | *nextTokPtr = ptr; |
661 | 118 | return XML_TOK_INVALID; |
662 | 230M | default: |
663 | 230M | ptr += MINBPC(enc); |
664 | 230M | break; |
665 | 242M | } |
666 | 242M | } |
667 | 5.66M | ptr += MINBPC(enc); |
668 | 5.66M | REQUIRE_CHAR(enc, ptr, end); |
669 | 5.66M | switch (BYTE_TYPE(enc, ptr)) { |
670 | 2.89M | case BT_S: |
671 | 4.59M | case BT_CR: |
672 | 4.67M | case BT_LF: |
673 | 4.67M | break; |
674 | 10.2k | case BT_SOL: |
675 | 10.2k | goto sol; |
676 | 978k | case BT_GT: |
677 | 978k | goto gt; |
678 | 164 | default: |
679 | 164 | *nextTokPtr = ptr; |
680 | 164 | return XML_TOK_INVALID; |
681 | 5.66M | } |
682 | | /* ptr points to closing quote */ |
683 | 6.69M | for (;;) { |
684 | 6.69M | ptr += MINBPC(enc); |
685 | 6.69M | REQUIRE_CHAR(enc, ptr, end); |
686 | 6.69M | switch (BYTE_TYPE(enc, ptr)) { |
687 | 4.45M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
688 | 783k | case BT_S: |
689 | 2.01M | case BT_CR: |
690 | 2.02M | case BT_LF: |
691 | 2.02M | continue; |
692 | 6.22k | case BT_GT: |
693 | 985k | gt: |
694 | 985k | *nextTokPtr = ptr + MINBPC(enc); |
695 | 985k | return XML_TOK_START_TAG_WITH_ATTS; |
696 | 35.1k | case BT_SOL: |
697 | 45.4k | sol: |
698 | 45.4k | ptr += MINBPC(enc); |
699 | 45.4k | REQUIRE_CHAR(enc, ptr, end); |
700 | 45.3k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
701 | 95 | *nextTokPtr = ptr; |
702 | 95 | return XML_TOK_INVALID; |
703 | 95 | } |
704 | 45.3k | *nextTokPtr = ptr + MINBPC(enc); |
705 | 45.3k | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
706 | 147 | default: |
707 | 147 | *nextTokPtr = ptr; |
708 | 147 | return XML_TOK_INVALID; |
709 | 6.69M | } |
710 | 4.63M | break; |
711 | 6.69M | } |
712 | 4.63M | break; |
713 | 4.67M | } |
714 | 4.63M | default: |
715 | 497 | *nextTokPtr = ptr; |
716 | 497 | return XML_TOK_INVALID; |
717 | 49.7M | } |
718 | 49.7M | } |
719 | 2.81k | return XML_TOK_PARTIAL; |
720 | 1.04M | } Line | Count | Source | 573 | 1.02M | const char **nextTokPtr) { | 574 | 1.02M | # ifdef XML_NS | 575 | 1.02M | int hadColon = 0; | 576 | 1.02M | # endif | 577 | 46.0M | while (HAS_CHAR(enc, ptr, end)) { | 578 | 46.0M | switch (BYTE_TYPE(enc, ptr)) { | 579 | 128M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 580 | 0 | # ifdef XML_NS | 581 | 196k | case BT_COLON: | 582 | 196k | if (hadColon) { | 583 | 17 | *nextTokPtr = ptr; | 584 | 17 | return XML_TOK_INVALID; | 585 | 17 | } | 586 | 196k | hadColon = 1; | 587 | 196k | ptr += MINBPC(enc); | 588 | 196k | REQUIRE_CHAR(enc, ptr, end); | 589 | 196k | switch (BYTE_TYPE(enc, ptr)) { | 590 | 189k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 591 | 18 | default: | 592 | 18 | *nextTokPtr = ptr; | 593 | 18 | return XML_TOK_INVALID; | 594 | 196k | } | 595 | 196k | break; | 596 | 196k | # endif | 597 | 196k | case BT_S: | 598 | 4.64k | case BT_CR: | 599 | 15.9k | case BT_LF: | 600 | 364k | for (;;) { | 601 | 364k | int t; | 602 | | | 603 | 364k | ptr += MINBPC(enc); | 604 | 364k | REQUIRE_CHAR(enc, ptr, end); | 605 | 364k | t = BYTE_TYPE(enc, ptr); | 606 | 364k | if (t == BT_EQUALS) | 607 | 15.6k | break; | 608 | 348k | switch (t) { | 609 | 285 | case BT_S: | 610 | 50.5k | case BT_LF: | 611 | 348k | case BT_CR: | 612 | 348k | break; | 613 | 119 | default: | 614 | 119 | *nextTokPtr = ptr; | 615 | 119 | return XML_TOK_INVALID; | 616 | 348k | } | 617 | 348k | } | 618 | | /* fall through */ | 619 | 4.84M | case BT_EQUALS: { | 620 | 4.84M | int open; | 621 | 4.84M | # ifdef XML_NS | 622 | 4.84M | hadColon = 0; | 623 | 4.84M | # endif | 624 | 5.35M | for (;;) { | 625 | 5.35M | ptr += MINBPC(enc); | 626 | 5.35M | REQUIRE_CHAR(enc, ptr, end); | 627 | 5.35M | open = BYTE_TYPE(enc, ptr); | 628 | 5.35M | if (open == BT_QUOT || open == BT_APOS) | 629 | 4.84M | break; | 630 | 509k | switch (open) { | 631 | 503k | case BT_S: | 632 | 509k | case BT_LF: | 633 | 509k | case BT_CR: | 634 | 509k | break; | 635 | 69 | default: | 636 | 69 | *nextTokPtr = ptr; | 637 | 69 | return XML_TOK_INVALID; | 638 | 509k | } | 639 | 509k | } | 640 | 4.84M | ptr += MINBPC(enc); | 641 | | /* in attribute value */ | 642 | 127M | for (;;) { | 643 | 127M | int t; | 644 | 127M | REQUIRE_CHAR(enc, ptr, end); | 645 | 127M | t = BYTE_TYPE(enc, ptr); | 646 | 127M | if (t == open) | 647 | 4.84M | break; | 648 | 123M | switch (t) { | 649 | 19.8M | INVALID_CASES(ptr, nextTokPtr) | 650 | 133k | case BT_AMP: { | 651 | 133k | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); | 652 | 133k | if (tok <= 0) { | 653 | 247 | if (tok == XML_TOK_INVALID) | 654 | 68 | *nextTokPtr = ptr; | 655 | 247 | return tok; | 656 | 247 | } | 657 | 132k | break; | 658 | 133k | } | 659 | 132k | case BT_LT: | 660 | 78 | *nextTokPtr = ptr; | 661 | 78 | return XML_TOK_INVALID; | 662 | 113M | default: | 663 | 113M | ptr += MINBPC(enc); | 664 | 113M | break; | 665 | 123M | } | 666 | 123M | } | 667 | 4.84M | ptr += MINBPC(enc); | 668 | 4.84M | REQUIRE_CHAR(enc, ptr, end); | 669 | 4.84M | switch (BYTE_TYPE(enc, ptr)) { | 670 | 2.16M | case BT_S: | 671 | 3.85M | case BT_CR: | 672 | 3.86M | case BT_LF: | 673 | 3.86M | break; | 674 | 7.79k | case BT_SOL: | 675 | 7.79k | goto sol; | 676 | 973k | case BT_GT: | 677 | 973k | goto gt; | 678 | 82 | default: | 679 | 82 | *nextTokPtr = ptr; | 680 | 82 | return XML_TOK_INVALID; | 681 | 4.84M | } | 682 | | /* ptr points to closing quote */ | 683 | 5.85M | for (;;) { | 684 | 5.85M | ptr += MINBPC(enc); | 685 | 5.85M | REQUIRE_CHAR(enc, ptr, end); | 686 | 5.85M | switch (BYTE_TYPE(enc, ptr)) { | 687 | 3.79M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 688 | 777k | case BT_S: | 689 | 1.98M | case BT_CR: | 690 | 1.99M | case BT_LF: | 691 | 1.99M | continue; | 692 | 3.79k | case BT_GT: | 693 | 977k | gt: | 694 | 977k | *nextTokPtr = ptr + MINBPC(enc); | 695 | 977k | return XML_TOK_START_TAG_WITH_ATTS; | 696 | 34.3k | case BT_SOL: | 697 | 42.0k | sol: | 698 | 42.0k | ptr += MINBPC(enc); | 699 | 42.0k | REQUIRE_CHAR(enc, ptr, end); | 700 | 42.0k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 701 | 24 | *nextTokPtr = ptr; | 702 | 24 | return XML_TOK_INVALID; | 703 | 24 | } | 704 | 42.0k | *nextTokPtr = ptr + MINBPC(enc); | 705 | 42.0k | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; | 706 | 38 | default: | 707 | 38 | *nextTokPtr = ptr; | 708 | 38 | return XML_TOK_INVALID; | 709 | 5.85M | } | 710 | 3.82M | break; | 711 | 5.85M | } | 712 | 3.82M | break; | 713 | 3.86M | } | 714 | 3.82M | default: | 715 | 339 | *nextTokPtr = ptr; | 716 | 339 | return XML_TOK_INVALID; | 717 | 46.0M | } | 718 | 46.0M | } | 719 | 1.98k | return XML_TOK_PARTIAL; | 720 | 1.02M | } |
xmltok.c:little2_scanAtts Line | Count | Source | 573 | 9.49k | const char **nextTokPtr) { | 574 | 9.49k | # ifdef XML_NS | 575 | 9.49k | int hadColon = 0; | 576 | 9.49k | # endif | 577 | 2.45M | while (HAS_CHAR(enc, ptr, end)) { | 578 | 2.45M | switch (BYTE_TYPE(enc, ptr)) { | 579 | 8.55M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 580 | 0 | # ifdef XML_NS | 581 | 911 | case BT_COLON: | 582 | 911 | if (hadColon) { | 583 | 7 | *nextTokPtr = ptr; | 584 | 7 | return XML_TOK_INVALID; | 585 | 7 | } | 586 | 904 | hadColon = 1; | 587 | 904 | ptr += MINBPC(enc); | 588 | 904 | REQUIRE_CHAR(enc, ptr, end); | 589 | 892 | switch (BYTE_TYPE(enc, ptr)) { | 590 | 778 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 591 | 36 | default: | 592 | 36 | *nextTokPtr = ptr; | 593 | 36 | return XML_TOK_INVALID; | 594 | 892 | } | 595 | 779 | break; | 596 | 779 | # endif | 597 | 779 | case BT_S: | 598 | 2.56k | case BT_CR: | 599 | 8.87k | case BT_LF: | 600 | 26.6k | for (;;) { | 601 | 26.6k | int t; | 602 | | | 603 | 26.6k | ptr += MINBPC(enc); | 604 | 26.6k | REQUIRE_CHAR(enc, ptr, end); | 605 | 26.4k | t = BYTE_TYPE(enc, ptr); | 606 | 26.4k | if (t == BT_EQUALS) | 607 | 8.63k | break; | 608 | 17.8k | switch (t) { | 609 | 3.71k | case BT_S: | 610 | 7.00k | case BT_LF: | 611 | 17.7k | case BT_CR: | 612 | 17.7k | break; | 613 | 72 | default: | 614 | 72 | *nextTokPtr = ptr; | 615 | 72 | return XML_TOK_INVALID; | 616 | 17.8k | } | 617 | 17.8k | } | 618 | | /* fall through */ | 619 | 737k | case BT_EQUALS: { | 620 | 737k | int open; | 621 | 737k | # ifdef XML_NS | 622 | 737k | hadColon = 0; | 623 | 737k | # endif | 624 | 758k | for (;;) { | 625 | 758k | ptr += MINBPC(enc); | 626 | 758k | REQUIRE_CHAR(enc, ptr, end); | 627 | 758k | open = BYTE_TYPE(enc, ptr); | 628 | 758k | if (open == BT_QUOT || open == BT_APOS) | 629 | 737k | break; | 630 | 20.6k | switch (open) { | 631 | 4.67k | case BT_S: | 632 | 8.98k | case BT_LF: | 633 | 20.6k | case BT_CR: | 634 | 20.6k | break; | 635 | 52 | default: | 636 | 52 | *nextTokPtr = ptr; | 637 | 52 | return XML_TOK_INVALID; | 638 | 20.6k | } | 639 | 20.6k | } | 640 | 737k | ptr += MINBPC(enc); | 641 | | /* in attribute value */ | 642 | 40.8M | for (;;) { | 643 | 40.8M | int t; | 644 | 40.8M | REQUIRE_CHAR(enc, ptr, end); | 645 | 40.8M | t = BYTE_TYPE(enc, ptr); | 646 | 40.8M | if (t == open) | 647 | 735k | break; | 648 | 40.1M | switch (t) { | 649 | 1.23M | INVALID_CASES(ptr, nextTokPtr) | 650 | 37.3k | case BT_AMP: { | 651 | 37.3k | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); | 652 | 37.3k | if (tok <= 0) { | 653 | 78 | if (tok == XML_TOK_INVALID) | 654 | 12 | *nextTokPtr = ptr; | 655 | 78 | return tok; | 656 | 78 | } | 657 | 37.2k | break; | 658 | 37.3k | } | 659 | 37.2k | case BT_LT: | 660 | 17 | *nextTokPtr = ptr; | 661 | 17 | return XML_TOK_INVALID; | 662 | 39.4M | default: | 663 | 39.4M | ptr += MINBPC(enc); | 664 | 39.4M | break; | 665 | 40.1M | } | 666 | 40.1M | } | 667 | 735k | ptr += MINBPC(enc); | 668 | 735k | REQUIRE_CHAR(enc, ptr, end); | 669 | 735k | switch (BYTE_TYPE(enc, ptr)) { | 670 | 724k | case BT_S: | 671 | 726k | case BT_CR: | 672 | 731k | case BT_LF: | 673 | 731k | break; | 674 | 1.56k | case BT_SOL: | 675 | 1.56k | goto sol; | 676 | 2.51k | case BT_GT: | 677 | 2.51k | goto gt; | 678 | 44 | default: | 679 | 44 | *nextTokPtr = ptr; | 680 | 44 | return XML_TOK_INVALID; | 681 | 735k | } | 682 | | /* ptr points to closing quote */ | 683 | 755k | for (;;) { | 684 | 755k | ptr += MINBPC(enc); | 685 | 755k | REQUIRE_CHAR(enc, ptr, end); | 686 | 755k | switch (BYTE_TYPE(enc, ptr)) { | 687 | 588k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 688 | 885 | case BT_S: | 689 | 24.4k | case BT_CR: | 690 | 24.8k | case BT_LF: | 691 | 24.8k | continue; | 692 | 1.30k | case BT_GT: | 693 | 3.82k | gt: | 694 | 3.82k | *nextTokPtr = ptr + MINBPC(enc); | 695 | 3.82k | return XML_TOK_START_TAG_WITH_ATTS; | 696 | 233 | case BT_SOL: | 697 | 1.79k | sol: | 698 | 1.79k | ptr += MINBPC(enc); | 699 | 1.79k | REQUIRE_CHAR(enc, ptr, end); | 700 | 1.77k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 701 | 27 | *nextTokPtr = ptr; | 702 | 27 | return XML_TOK_INVALID; | 703 | 27 | } | 704 | 1.74k | *nextTokPtr = ptr + MINBPC(enc); | 705 | 1.74k | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; | 706 | 56 | default: | 707 | 56 | *nextTokPtr = ptr; | 708 | 56 | return XML_TOK_INVALID; | 709 | 755k | } | 710 | 729k | break; | 711 | 755k | } | 712 | 729k | break; | 713 | 731k | } | 714 | 729k | default: | 715 | 72 | *nextTokPtr = ptr; | 716 | 72 | return XML_TOK_INVALID; | 717 | 2.45M | } | 718 | 2.45M | } | 719 | 390 | return XML_TOK_PARTIAL; | 720 | 9.49k | } |
Line | Count | Source | 573 | 10.2k | const char **nextTokPtr) { | 574 | 10.2k | # ifdef XML_NS | 575 | 10.2k | int hadColon = 0; | 576 | 10.2k | # endif | 577 | 1.25M | while (HAS_CHAR(enc, ptr, end)) { | 578 | 1.25M | switch (BYTE_TYPE(enc, ptr)) { | 579 | 5.76M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 580 | 0 | # ifdef XML_NS | 581 | 2.51k | case BT_COLON: | 582 | 2.51k | if (hadColon) { | 583 | 7 | *nextTokPtr = ptr; | 584 | 7 | return XML_TOK_INVALID; | 585 | 7 | } | 586 | 2.51k | hadColon = 1; | 587 | 2.51k | ptr += MINBPC(enc); | 588 | 2.51k | REQUIRE_CHAR(enc, ptr, end); | 589 | 2.49k | switch (BYTE_TYPE(enc, ptr)) { | 590 | 4.39k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 591 | 39 | default: | 592 | 39 | *nextTokPtr = ptr; | 593 | 39 | return XML_TOK_INVALID; | 594 | 2.49k | } | 595 | 2.37k | break; | 596 | 2.37k | # endif | 597 | 2.37k | case BT_S: | 598 | 3.11k | case BT_CR: | 599 | 9.89k | case BT_LF: | 600 | 241k | for (;;) { | 601 | 241k | int t; | 602 | | | 603 | 241k | ptr += MINBPC(enc); | 604 | 241k | REQUIRE_CHAR(enc, ptr, end); | 605 | 241k | t = BYTE_TYPE(enc, ptr); | 606 | 241k | if (t == BT_EQUALS) | 607 | 9.71k | break; | 608 | 231k | switch (t) { | 609 | 119k | case BT_S: | 610 | 227k | case BT_LF: | 611 | 231k | case BT_CR: | 612 | 231k | break; | 613 | 53 | default: | 614 | 53 | *nextTokPtr = ptr; | 615 | 53 | return XML_TOK_INVALID; | 616 | 231k | } | 617 | 231k | } | 618 | | /* fall through */ | 619 | 89.0k | case BT_EQUALS: { | 620 | 89.0k | int open; | 621 | 89.0k | # ifdef XML_NS | 622 | 89.0k | hadColon = 0; | 623 | 89.0k | # endif | 624 | 102k | for (;;) { | 625 | 102k | ptr += MINBPC(enc); | 626 | 102k | REQUIRE_CHAR(enc, ptr, end); | 627 | 102k | open = BYTE_TYPE(enc, ptr); | 628 | 102k | if (open == BT_QUOT || open == BT_APOS) | 629 | 88.8k | break; | 630 | 13.4k | switch (open) { | 631 | 2.91k | case BT_S: | 632 | 5.93k | case BT_LF: | 633 | 13.4k | case BT_CR: | 634 | 13.4k | break; | 635 | 50 | default: | 636 | 50 | *nextTokPtr = ptr; | 637 | 50 | return XML_TOK_INVALID; | 638 | 13.4k | } | 639 | 13.4k | } | 640 | 88.8k | ptr += MINBPC(enc); | 641 | | /* in attribute value */ | 642 | 79.0M | for (;;) { | 643 | 79.0M | int t; | 644 | 79.0M | REQUIRE_CHAR(enc, ptr, end); | 645 | 79.0M | t = BYTE_TYPE(enc, ptr); | 646 | 79.0M | if (t == open) | 647 | 85.6k | break; | 648 | 78.9M | switch (t) { | 649 | 1.00M | INVALID_CASES(ptr, nextTokPtr) | 650 | 219k | case BT_AMP: { | 651 | 219k | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); | 652 | 219k | if (tok <= 0) { | 653 | 135 | if (tok == XML_TOK_INVALID) | 654 | 43 | *nextTokPtr = ptr; | 655 | 135 | return tok; | 656 | 135 | } | 657 | 219k | break; | 658 | 219k | } | 659 | 219k | case BT_LT: | 660 | 23 | *nextTokPtr = ptr; | 661 | 23 | return XML_TOK_INVALID; | 662 | 78.2M | default: | 663 | 78.2M | ptr += MINBPC(enc); | 664 | 78.2M | break; | 665 | 78.9M | } | 666 | 78.9M | } | 667 | 85.6k | ptr += MINBPC(enc); | 668 | 85.6k | REQUIRE_CHAR(enc, ptr, end); | 669 | 85.6k | switch (BYTE_TYPE(enc, ptr)) { | 670 | 11.1k | case BT_S: | 671 | 15.2k | case BT_CR: | 672 | 81.8k | case BT_LF: | 673 | 81.8k | break; | 674 | 937 | case BT_SOL: | 675 | 937 | goto sol; | 676 | 2.78k | case BT_GT: | 677 | 2.78k | goto gt; | 678 | 38 | default: | 679 | 38 | *nextTokPtr = ptr; | 680 | 38 | return XML_TOK_INVALID; | 681 | 85.6k | } | 682 | | /* ptr points to closing quote */ | 683 | 90.0k | for (;;) { | 684 | 90.0k | ptr += MINBPC(enc); | 685 | 90.0k | REQUIRE_CHAR(enc, ptr, end); | 686 | 89.9k | switch (BYTE_TYPE(enc, ptr)) { | 687 | 70.6k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 688 | 5.12k | case BT_S: | 689 | 7.36k | case BT_CR: | 690 | 8.17k | case BT_LF: | 691 | 8.17k | continue; | 692 | 1.12k | case BT_GT: | 693 | 3.90k | gt: | 694 | 3.90k | *nextTokPtr = ptr + MINBPC(enc); | 695 | 3.90k | return XML_TOK_START_TAG_WITH_ATTS; | 696 | 646 | case BT_SOL: | 697 | 1.58k | sol: | 698 | 1.58k | ptr += MINBPC(enc); | 699 | 1.58k | REQUIRE_CHAR(enc, ptr, end); | 700 | 1.56k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 701 | 44 | *nextTokPtr = ptr; | 702 | 44 | return XML_TOK_INVALID; | 703 | 44 | } | 704 | 1.52k | *nextTokPtr = ptr + MINBPC(enc); | 705 | 1.52k | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; | 706 | 53 | default: | 707 | 53 | *nextTokPtr = ptr; | 708 | 53 | return XML_TOK_INVALID; | 709 | 89.9k | } | 710 | 79.8k | break; | 711 | 89.9k | } | 712 | 79.8k | break; | 713 | 81.8k | } | 714 | 79.8k | default: | 715 | 86 | *nextTokPtr = ptr; | 716 | 86 | return XML_TOK_INVALID; | 717 | 1.25M | } | 718 | 1.25M | } | 719 | 443 | return XML_TOK_PARTIAL; | 720 | 10.2k | } |
|
721 | | |
722 | | /* ptr points to character following "<" */ |
723 | | |
724 | | static int PTRCALL |
725 | | PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
726 | 24.7M | const char **nextTokPtr) { |
727 | 24.7M | # ifdef XML_NS |
728 | 24.7M | int hadColon; |
729 | 24.7M | # endif |
730 | 24.7M | REQUIRE_CHAR(enc, ptr, end); |
731 | 24.7M | switch (BYTE_TYPE(enc, ptr)) { |
732 | 8.23M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
733 | 2.14M | case BT_EXCL: |
734 | 2.14M | ptr += MINBPC(enc); |
735 | 2.14M | REQUIRE_CHAR(enc, ptr, end); |
736 | 2.14M | switch (BYTE_TYPE(enc, ptr)) { |
737 | 714k | case BT_MINUS: |
738 | 714k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
739 | 1.43M | case BT_LSQB: |
740 | 1.43M | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
741 | 2.14M | } |
742 | 221 | *nextTokPtr = ptr; |
743 | 221 | return XML_TOK_INVALID; |
744 | 441k | case BT_QUEST: |
745 | 441k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
746 | 199k | case BT_SOL: |
747 | 199k | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
748 | 722 | default: |
749 | 722 | *nextTokPtr = ptr; |
750 | 722 | return XML_TOK_INVALID; |
751 | 24.7M | } |
752 | 21.9M | # ifdef XML_NS |
753 | 21.9M | hadColon = 0; |
754 | 21.9M | # endif |
755 | | /* we have a start-tag */ |
756 | 133M | while (HAS_CHAR(enc, ptr, end)) { |
757 | 133M | switch (BYTE_TYPE(enc, ptr)) { |
758 | 386M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
759 | 0 | # ifdef XML_NS |
760 | 25.5k | case BT_COLON: |
761 | 25.5k | if (hadColon) { |
762 | 35 | *nextTokPtr = ptr; |
763 | 35 | return XML_TOK_INVALID; |
764 | 35 | } |
765 | 25.4k | hadColon = 1; |
766 | 25.4k | ptr += MINBPC(enc); |
767 | 25.4k | REQUIRE_CHAR(enc, ptr, end); |
768 | 25.4k | switch (BYTE_TYPE(enc, ptr)) { |
769 | 19.6k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
770 | 188 | default: |
771 | 188 | *nextTokPtr = ptr; |
772 | 188 | return XML_TOK_INVALID; |
773 | 25.4k | } |
774 | 24.9k | break; |
775 | 24.9k | # endif |
776 | 3.46M | case BT_S: |
777 | 3.59M | case BT_CR: |
778 | 3.66M | case BT_LF: { |
779 | 3.66M | ptr += MINBPC(enc); |
780 | 4.39M | while (HAS_CHAR(enc, ptr, end)) { |
781 | 4.39M | switch (BYTE_TYPE(enc, ptr)) { |
782 | 1.16M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
783 | 80.0k | case BT_GT: |
784 | 80.0k | goto gt; |
785 | 2.54M | case BT_SOL: |
786 | 2.54M | goto sol; |
787 | 71.7k | case BT_S: |
788 | 596k | case BT_CR: |
789 | 726k | case BT_LF: |
790 | 726k | ptr += MINBPC(enc); |
791 | 726k | continue; |
792 | 235 | default: |
793 | 235 | *nextTokPtr = ptr; |
794 | 235 | return XML_TOK_INVALID; |
795 | 4.39M | } |
796 | 1.04M | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
797 | 4.39M | } |
798 | 737 | return XML_TOK_PARTIAL; |
799 | 3.66M | } |
800 | 18.2M | case BT_GT: |
801 | 18.2M | gt: |
802 | 18.2M | *nextTokPtr = ptr + MINBPC(enc); |
803 | 18.2M | return XML_TOK_START_TAG_NO_ATTS; |
804 | 52.8k | case BT_SOL: |
805 | 2.59M | sol: |
806 | 2.59M | ptr += MINBPC(enc); |
807 | 2.59M | REQUIRE_CHAR(enc, ptr, end); |
808 | 2.59M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
809 | 185 | *nextTokPtr = ptr; |
810 | 185 | return XML_TOK_INVALID; |
811 | 185 | } |
812 | 2.59M | *nextTokPtr = ptr + MINBPC(enc); |
813 | 2.59M | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
814 | 851 | default: |
815 | 851 | *nextTokPtr = ptr; |
816 | 851 | return XML_TOK_INVALID; |
817 | 133M | } |
818 | 133M | } |
819 | 3.65k | return XML_TOK_PARTIAL; |
820 | 21.9M | } Line | Count | Source | 726 | 23.9M | const char **nextTokPtr) { | 727 | 23.9M | # ifdef XML_NS | 728 | 23.9M | int hadColon; | 729 | 23.9M | # endif | 730 | 23.9M | REQUIRE_CHAR(enc, ptr, end); | 731 | 23.9M | switch (BYTE_TYPE(enc, ptr)) { | 732 | 7.91M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 733 | 2.09M | case BT_EXCL: | 734 | 2.09M | ptr += MINBPC(enc); | 735 | 2.09M | REQUIRE_CHAR(enc, ptr, end); | 736 | 2.09M | switch (BYTE_TYPE(enc, ptr)) { | 737 | 678k | case BT_MINUS: | 738 | 678k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 739 | 1.42M | case BT_LSQB: | 740 | 1.42M | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 741 | 2.09M | } | 742 | 154 | *nextTokPtr = ptr; | 743 | 154 | return XML_TOK_INVALID; | 744 | 105k | case BT_QUEST: | 745 | 105k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 746 | 155k | case BT_SOL: | 747 | 155k | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 748 | 558 | default: | 749 | 558 | *nextTokPtr = ptr; | 750 | 558 | return XML_TOK_INVALID; | 751 | 23.9M | } | 752 | 21.5M | # ifdef XML_NS | 753 | 21.5M | hadColon = 0; | 754 | 21.5M | # endif | 755 | | /* we have a start-tag */ | 756 | 131M | while (HAS_CHAR(enc, ptr, end)) { | 757 | 131M | switch (BYTE_TYPE(enc, ptr)) { | 758 | 378M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 759 | 0 | # ifdef XML_NS | 760 | 21.9k | case BT_COLON: | 761 | 21.9k | if (hadColon) { | 762 | 18 | *nextTokPtr = ptr; | 763 | 18 | return XML_TOK_INVALID; | 764 | 18 | } | 765 | 21.9k | hadColon = 1; | 766 | 21.9k | ptr += MINBPC(enc); | 767 | 21.9k | REQUIRE_CHAR(enc, ptr, end); | 768 | 21.9k | switch (BYTE_TYPE(enc, ptr)) { | 769 | 16.3k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 770 | 78 | default: | 771 | 78 | *nextTokPtr = ptr; | 772 | 78 | return XML_TOK_INVALID; | 773 | 21.9k | } | 774 | 21.7k | break; | 775 | 21.7k | # endif | 776 | 3.45M | case BT_S: | 777 | 3.53M | case BT_CR: | 778 | 3.60M | case BT_LF: { | 779 | 3.60M | ptr += MINBPC(enc); | 780 | 3.97M | while (HAS_CHAR(enc, ptr, end)) { | 781 | 3.97M | switch (BYTE_TYPE(enc, ptr)) { | 782 | 1.14M | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 783 | 37.4k | case BT_GT: | 784 | 37.4k | goto gt; | 785 | 2.53M | case BT_SOL: | 786 | 2.53M | goto sol; | 787 | 4.19k | case BT_S: | 788 | 300k | case BT_CR: | 789 | 366k | case BT_LF: | 790 | 366k | ptr += MINBPC(enc); | 791 | 366k | continue; | 792 | 102 | default: | 793 | 102 | *nextTokPtr = ptr; | 794 | 102 | return XML_TOK_INVALID; | 795 | 3.97M | } | 796 | 1.02M | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); | 797 | 3.97M | } | 798 | 351 | return XML_TOK_PARTIAL; | 799 | 3.60M | } | 800 | 17.8M | case BT_GT: | 801 | 17.9M | gt: | 802 | 17.9M | *nextTokPtr = ptr + MINBPC(enc); | 803 | 17.9M | return XML_TOK_START_TAG_NO_ATTS; | 804 | 51.5k | case BT_SOL: | 805 | 2.59M | sol: | 806 | 2.59M | ptr += MINBPC(enc); | 807 | 2.59M | REQUIRE_CHAR(enc, ptr, end); | 808 | 2.59M | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 809 | 84 | *nextTokPtr = ptr; | 810 | 84 | return XML_TOK_INVALID; | 811 | 84 | } | 812 | 2.59M | *nextTokPtr = ptr + MINBPC(enc); | 813 | 2.59M | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; | 814 | 698 | default: | 815 | 698 | *nextTokPtr = ptr; | 816 | 698 | return XML_TOK_INVALID; | 817 | 131M | } | 818 | 131M | } | 819 | 2.54k | return XML_TOK_PARTIAL; | 820 | 21.5M | } |
Line | Count | Source | 726 | 370k | const char **nextTokPtr) { | 727 | 370k | # ifdef XML_NS | 728 | 370k | int hadColon; | 729 | 370k | # endif | 730 | 370k | REQUIRE_CHAR(enc, ptr, end); | 731 | 370k | switch (BYTE_TYPE(enc, ptr)) { | 732 | 243k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 733 | 36.3k | case BT_EXCL: | 734 | 36.3k | ptr += MINBPC(enc); | 735 | 36.3k | REQUIRE_CHAR(enc, ptr, end); | 736 | 36.3k | switch (BYTE_TYPE(enc, ptr)) { | 737 | 34.7k | case BT_MINUS: | 738 | 34.7k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 739 | 1.51k | case BT_LSQB: | 740 | 1.51k | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 741 | 36.3k | } | 742 | 38 | *nextTokPtr = ptr; | 743 | 38 | return XML_TOK_INVALID; | 744 | 106k | case BT_QUEST: | 745 | 106k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 746 | 2.03k | case BT_SOL: | 747 | 2.03k | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 748 | 75 | default: | 749 | 75 | *nextTokPtr = ptr; | 750 | 75 | return XML_TOK_INVALID; | 751 | 370k | } | 752 | 225k | # ifdef XML_NS | 753 | 225k | hadColon = 0; | 754 | 225k | # endif | 755 | | /* we have a start-tag */ | 756 | 1.35M | while (HAS_CHAR(enc, ptr, end)) { | 757 | 1.35M | switch (BYTE_TYPE(enc, ptr)) { | 758 | 5.63M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 759 | 0 | # ifdef XML_NS | 760 | 2.54k | case BT_COLON: | 761 | 2.54k | if (hadColon) { | 762 | 9 | *nextTokPtr = ptr; | 763 | 9 | return XML_TOK_INVALID; | 764 | 9 | } | 765 | 2.53k | hadColon = 1; | 766 | 2.53k | ptr += MINBPC(enc); | 767 | 2.53k | REQUIRE_CHAR(enc, ptr, end); | 768 | 2.51k | switch (BYTE_TYPE(enc, ptr)) { | 769 | 2.40k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 770 | 63 | default: | 771 | 63 | *nextTokPtr = ptr; | 772 | 63 | return XML_TOK_INVALID; | 773 | 2.51k | } | 774 | 2.36k | break; | 775 | 2.36k | # endif | 776 | 6.03k | case BT_S: | 777 | 9.03k | case BT_CR: | 778 | 11.6k | case BT_LF: { | 779 | 11.6k | ptr += MINBPC(enc); | 780 | 116k | while (HAS_CHAR(enc, ptr, end)) { | 781 | 116k | switch (BYTE_TYPE(enc, ptr)) { | 782 | 10.9k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 783 | 1.58k | case BT_GT: | 784 | 1.58k | goto gt; | 785 | 96 | case BT_SOL: | 786 | 96 | goto sol; | 787 | 42.8k | case BT_S: | 788 | 64.5k | case BT_CR: | 789 | 105k | case BT_LF: | 790 | 105k | ptr += MINBPC(enc); | 791 | 105k | continue; | 792 | 69 | default: | 793 | 69 | *nextTokPtr = ptr; | 794 | 69 | return XML_TOK_INVALID; | 795 | 116k | } | 796 | 9.49k | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); | 797 | 116k | } | 798 | 200 | return XML_TOK_PARTIAL; | 799 | 11.6k | } | 800 | 211k | case BT_GT: | 801 | 213k | gt: | 802 | 213k | *nextTokPtr = ptr + MINBPC(enc); | 803 | 213k | return XML_TOK_START_TAG_NO_ATTS; | 804 | 808 | case BT_SOL: | 805 | 904 | sol: | 806 | 904 | ptr += MINBPC(enc); | 807 | 904 | REQUIRE_CHAR(enc, ptr, end); | 808 | 878 | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 809 | 57 | *nextTokPtr = ptr; | 810 | 57 | return XML_TOK_INVALID; | 811 | 57 | } | 812 | 821 | *nextTokPtr = ptr + MINBPC(enc); | 813 | 821 | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; | 814 | 83 | default: | 815 | 83 | *nextTokPtr = ptr; | 816 | 83 | return XML_TOK_INVALID; | 817 | 1.35M | } | 818 | 1.35M | } | 819 | 624 | return XML_TOK_PARTIAL; | 820 | 225k | } |
Line | Count | Source | 726 | 436k | const char **nextTokPtr) { | 727 | 436k | # ifdef XML_NS | 728 | 436k | int hadColon; | 729 | 436k | # endif | 730 | 436k | REQUIRE_CHAR(enc, ptr, end); | 731 | 436k | switch (BYTE_TYPE(enc, ptr)) { | 732 | 75.5k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 733 | 10.6k | case BT_EXCL: | 734 | 10.6k | ptr += MINBPC(enc); | 735 | 10.6k | REQUIRE_CHAR(enc, ptr, end); | 736 | 10.6k | switch (BYTE_TYPE(enc, ptr)) { | 737 | 1.41k | case BT_MINUS: | 738 | 1.41k | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 739 | 9.20k | case BT_LSQB: | 740 | 9.20k | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 741 | 10.6k | } | 742 | 29 | *nextTokPtr = ptr; | 743 | 29 | return XML_TOK_INVALID; | 744 | 229k | case BT_QUEST: | 745 | 229k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 746 | 42.1k | case BT_SOL: | 747 | 42.1k | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 748 | 89 | default: | 749 | 89 | *nextTokPtr = ptr; | 750 | 89 | return XML_TOK_INVALID; | 751 | 436k | } | 752 | 153k | # ifdef XML_NS | 753 | 153k | hadColon = 0; | 754 | 153k | # endif | 755 | | /* we have a start-tag */ | 756 | 473k | while (HAS_CHAR(enc, ptr, end)) { | 757 | 473k | switch (BYTE_TYPE(enc, ptr)) { | 758 | 1.58M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 759 | 0 | # ifdef XML_NS | 760 | 1.01k | case BT_COLON: | 761 | 1.01k | if (hadColon) { | 762 | 8 | *nextTokPtr = ptr; | 763 | 8 | return XML_TOK_INVALID; | 764 | 8 | } | 765 | 1.01k | hadColon = 1; | 766 | 1.01k | ptr += MINBPC(enc); | 767 | 1.01k | REQUIRE_CHAR(enc, ptr, end); | 768 | 997 | switch (BYTE_TYPE(enc, ptr)) { | 769 | 864 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 770 | 47 | default: | 771 | 47 | *nextTokPtr = ptr; | 772 | 47 | return XML_TOK_INVALID; | 773 | 997 | } | 774 | 843 | break; | 775 | 843 | # endif | 776 | 5.27k | case BT_S: | 777 | 50.7k | case BT_CR: | 778 | 52.1k | case BT_LF: { | 779 | 52.1k | ptr += MINBPC(enc); | 780 | 306k | while (HAS_CHAR(enc, ptr, end)) { | 781 | 306k | switch (BYTE_TYPE(enc, ptr)) { | 782 | 8.28k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 783 | 41.0k | case BT_GT: | 784 | 41.0k | goto gt; | 785 | 470 | case BT_SOL: | 786 | 470 | goto sol; | 787 | 24.6k | case BT_S: | 788 | 231k | case BT_CR: | 789 | 254k | case BT_LF: | 790 | 254k | ptr += MINBPC(enc); | 791 | 254k | continue; | 792 | 64 | default: | 793 | 64 | *nextTokPtr = ptr; | 794 | 64 | return XML_TOK_INVALID; | 795 | 306k | } | 796 | 10.2k | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); | 797 | 306k | } | 798 | 186 | return XML_TOK_PARTIAL; | 799 | 52.1k | } | 800 | 99.5k | case BT_GT: | 801 | 140k | gt: | 802 | 140k | *nextTokPtr = ptr + MINBPC(enc); | 803 | 140k | return XML_TOK_START_TAG_NO_ATTS; | 804 | 487 | case BT_SOL: | 805 | 957 | sol: | 806 | 957 | ptr += MINBPC(enc); | 807 | 957 | REQUIRE_CHAR(enc, ptr, end); | 808 | 911 | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 809 | 44 | *nextTokPtr = ptr; | 810 | 44 | return XML_TOK_INVALID; | 811 | 44 | } | 812 | 867 | *nextTokPtr = ptr + MINBPC(enc); | 813 | 867 | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; | 814 | 70 | default: | 815 | 70 | *nextTokPtr = ptr; | 816 | 70 | return XML_TOK_INVALID; | 817 | 473k | } | 818 | 473k | } | 819 | 483 | return XML_TOK_PARTIAL; | 820 | 153k | } |
|
821 | | |
822 | | static int PTRCALL |
823 | | PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
824 | 45.8M | const char **nextTokPtr) { |
825 | 45.8M | if (ptr >= end) |
826 | 1.13M | return XML_TOK_NONE; |
827 | 44.6M | if (MINBPC(enc) > 1) { |
828 | 2.58M | size_t n = end - ptr; |
829 | 2.58M | if (n & (MINBPC(enc) - 1)) { |
830 | 103k | n &= ~(MINBPC(enc) - 1); |
831 | 103k | if (n == 0) |
832 | 1.89k | return XML_TOK_PARTIAL; |
833 | 101k | end = ptr + n; |
834 | 101k | } |
835 | 2.58M | } |
836 | 44.6M | switch (BYTE_TYPE(enc, ptr)) { |
837 | 24.7M | case BT_LT: |
838 | 24.7M | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
839 | 4.08M | case BT_AMP: |
840 | 4.08M | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
841 | 2.14M | case BT_CR: |
842 | 2.14M | ptr += MINBPC(enc); |
843 | 2.14M | if (! HAS_CHAR(enc, ptr, end)) |
844 | 1.06k | return XML_TOK_TRAILING_CR; |
845 | 2.14M | if (BYTE_TYPE(enc, ptr) == BT_LF) |
846 | 28.4k | ptr += MINBPC(enc); |
847 | 2.14M | *nextTokPtr = ptr; |
848 | 2.14M | return XML_TOK_DATA_NEWLINE; |
849 | 3.58M | case BT_LF: |
850 | 3.58M | *nextTokPtr = ptr + MINBPC(enc); |
851 | 3.58M | return XML_TOK_DATA_NEWLINE; |
852 | 64.7k | case BT_RSQB: |
853 | 64.7k | ptr += MINBPC(enc); |
854 | 64.7k | if (! HAS_CHAR(enc, ptr, end)) |
855 | 2.65k | return XML_TOK_TRAILING_RSQB; |
856 | 62.0k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
857 | 42.0k | break; |
858 | 19.9k | ptr += MINBPC(enc); |
859 | 19.9k | if (! HAS_CHAR(enc, ptr, end)) |
860 | 3.98k | return XML_TOK_TRAILING_RSQB; |
861 | 15.9k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
862 | 15.9k | ptr -= MINBPC(enc); |
863 | 15.9k | break; |
864 | 15.9k | } |
865 | 24 | *nextTokPtr = ptr; |
866 | 24 | return XML_TOK_INVALID; |
867 | 2.56M | INVALID_CASES(ptr, nextTokPtr) |
868 | 8.79M | default: |
869 | 8.79M | ptr += MINBPC(enc); |
870 | 8.79M | break; |
871 | 44.6M | } |
872 | 1.09G | while (HAS_CHAR(enc, ptr, end)) { |
873 | 1.09G | switch (BYTE_TYPE(enc, ptr)) { |
874 | 0 | # define LEAD_CASE(n) \ |
875 | 351M | case BT_LEAD##n: \ |
876 | 351M | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
877 | 1.78k | *nextTokPtr = ptr; \ |
878 | 1.78k | return XML_TOK_DATA_CHARS; \ |
879 | 1.78k | } \ |
880 | 351M | ptr += n; \ |
881 | 351M | break; |
882 | 231M | LEAD_CASE(2) |
883 | 118M | LEAD_CASE(3) |
884 | 2.31M | LEAD_CASE(4) |
885 | 0 | # undef LEAD_CASE |
886 | 1.56M | case BT_RSQB: |
887 | 1.56M | if (HAS_CHARS(enc, ptr, end, 2)) { |
888 | 1.56M | if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
889 | 417k | ptr += MINBPC(enc); |
890 | 417k | break; |
891 | 417k | } |
892 | 1.14M | if (HAS_CHARS(enc, ptr, end, 3)) { |
893 | 1.14M | if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { |
894 | 1.14M | ptr += MINBPC(enc); |
895 | 1.14M | break; |
896 | 1.14M | } |
897 | 39 | *nextTokPtr = ptr + 2 * MINBPC(enc); |
898 | 39 | return XML_TOK_INVALID; |
899 | 1.14M | } |
900 | 1.14M | } |
901 | | /* fall through */ |
902 | 1.06M | case BT_AMP: |
903 | 6.04M | case BT_LT: |
904 | 6.04M | case BT_NONXML: |
905 | 6.04M | case BT_MALFORM: |
906 | 6.04M | case BT_TRAIL: |
907 | 6.38M | case BT_CR: |
908 | 8.21M | case BT_LF: |
909 | 8.21M | *nextTokPtr = ptr; |
910 | 8.21M | return XML_TOK_DATA_CHARS; |
911 | 736M | default: |
912 | 736M | ptr += MINBPC(enc); |
913 | 736M | break; |
914 | 1.09G | } |
915 | 1.09G | } |
916 | 1.91M | *nextTokPtr = ptr; |
917 | 1.91M | return XML_TOK_DATA_CHARS; |
918 | 10.1M | } xmltok.c:normal_contentTok Line | Count | Source | 824 | 43.2M | const char **nextTokPtr) { | 825 | 43.2M | if (ptr >= end) | 826 | 1.12M | return XML_TOK_NONE; | 827 | 42.0M | if (MINBPC(enc) > 1) { | 828 | 0 | size_t n = end - ptr; | 829 | 0 | if (n & (MINBPC(enc) - 1)) { | 830 | 0 | n &= ~(MINBPC(enc) - 1); | 831 | 0 | if (n == 0) | 832 | 0 | return XML_TOK_PARTIAL; | 833 | 0 | end = ptr + n; | 834 | 0 | } | 835 | 0 | } | 836 | 42.0M | switch (BYTE_TYPE(enc, ptr)) { | 837 | 23.9M | case BT_LT: | 838 | 23.9M | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 839 | 3.89M | case BT_AMP: | 840 | 3.89M | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 841 | 1.29M | case BT_CR: | 842 | 1.29M | ptr += MINBPC(enc); | 843 | 1.29M | if (! HAS_CHAR(enc, ptr, end)) | 844 | 464 | return XML_TOK_TRAILING_CR; | 845 | 1.29M | if (BYTE_TYPE(enc, ptr) == BT_LF) | 846 | 346 | ptr += MINBPC(enc); | 847 | 1.29M | *nextTokPtr = ptr; | 848 | 1.29M | return XML_TOK_DATA_NEWLINE; | 849 | 3.46M | case BT_LF: | 850 | 3.46M | *nextTokPtr = ptr + MINBPC(enc); | 851 | 3.46M | return XML_TOK_DATA_NEWLINE; | 852 | 50.8k | case BT_RSQB: | 853 | 50.8k | ptr += MINBPC(enc); | 854 | 50.8k | if (! HAS_CHAR(enc, ptr, end)) | 855 | 2.29k | return XML_TOK_TRAILING_RSQB; | 856 | 48.5k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 857 | 33.2k | break; | 858 | 15.3k | ptr += MINBPC(enc); | 859 | 15.3k | if (! HAS_CHAR(enc, ptr, end)) | 860 | 3.73k | return XML_TOK_TRAILING_RSQB; | 861 | 11.6k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 862 | 11.5k | ptr -= MINBPC(enc); | 863 | 11.5k | break; | 864 | 11.5k | } | 865 | 12 | *nextTokPtr = ptr; | 866 | 12 | return XML_TOK_INVALID; | 867 | 2.53M | INVALID_CASES(ptr, nextTokPtr) | 868 | 8.20M | default: | 869 | 8.20M | ptr += MINBPC(enc); | 870 | 8.20M | break; | 871 | 42.0M | } | 872 | 1.08G | while (HAS_CHAR(enc, ptr, end)) { | 873 | 1.08G | switch (BYTE_TYPE(enc, ptr)) { | 874 | 0 | # define LEAD_CASE(n) \ | 875 | 0 | case BT_LEAD##n: \ | 876 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 877 | 0 | *nextTokPtr = ptr; \ | 878 | 0 | return XML_TOK_DATA_CHARS; \ | 879 | 0 | } \ | 880 | 0 | ptr += n; \ | 881 | 0 | break; | 882 | 231M | LEAD_CASE(2) | 883 | 118M | LEAD_CASE(3) | 884 | 949k | LEAD_CASE(4) | 885 | 0 | # undef LEAD_CASE | 886 | 1.19M | case BT_RSQB: | 887 | 1.19M | if (HAS_CHARS(enc, ptr, end, 2)) { | 888 | 1.18M | if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { | 889 | 225k | ptr += MINBPC(enc); | 890 | 225k | break; | 891 | 225k | } | 892 | 962k | if (HAS_CHARS(enc, ptr, end, 3)) { | 893 | 958k | if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { | 894 | 958k | ptr += MINBPC(enc); | 895 | 958k | break; | 896 | 958k | } | 897 | 22 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 898 | 22 | return XML_TOK_INVALID; | 899 | 958k | } | 900 | 962k | } | 901 | | /* fall through */ | 902 | 905k | case BT_AMP: | 903 | 5.62M | case BT_LT: | 904 | 5.62M | case BT_NONXML: | 905 | 5.62M | case BT_MALFORM: | 906 | 5.62M | case BT_TRAIL: | 907 | 5.85M | case BT_CR: | 908 | 7.61M | case BT_LF: | 909 | 7.61M | *nextTokPtr = ptr; | 910 | 7.61M | return XML_TOK_DATA_CHARS; | 911 | 723M | default: | 912 | 723M | ptr += MINBPC(enc); | 913 | 723M | break; | 914 | 1.08G | } | 915 | 1.08G | } | 916 | 1.90M | *nextTokPtr = ptr; | 917 | 1.90M | return XML_TOK_DATA_CHARS; | 918 | 9.51M | } |
xmltok.c:little2_contentTok Line | Count | Source | 824 | 1.22M | const char **nextTokPtr) { | 825 | 1.22M | if (ptr >= end) | 826 | 4.80k | return XML_TOK_NONE; | 827 | 1.22M | if (MINBPC(enc) > 1) { | 828 | 1.22M | size_t n = end - ptr; | 829 | 1.22M | if (n & (MINBPC(enc) - 1)) { | 830 | 57.9k | n &= ~(MINBPC(enc) - 1); | 831 | 57.9k | if (n == 0) | 832 | 896 | return XML_TOK_PARTIAL; | 833 | 57.0k | end = ptr + n; | 834 | 57.0k | } | 835 | 1.22M | } | 836 | 1.22M | switch (BYTE_TYPE(enc, ptr)) { | 837 | 370k | case BT_LT: | 838 | 370k | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 839 | 40.9k | case BT_AMP: | 840 | 40.9k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 841 | 400k | case BT_CR: | 842 | 400k | ptr += MINBPC(enc); | 843 | 400k | if (! HAS_CHAR(enc, ptr, end)) | 844 | 290 | return XML_TOK_TRAILING_CR; | 845 | 400k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 846 | 14.4k | ptr += MINBPC(enc); | 847 | 400k | *nextTokPtr = ptr; | 848 | 400k | return XML_TOK_DATA_NEWLINE; | 849 | 97.2k | case BT_LF: | 850 | 97.2k | *nextTokPtr = ptr + MINBPC(enc); | 851 | 97.2k | return XML_TOK_DATA_NEWLINE; | 852 | 6.76k | case BT_RSQB: | 853 | 6.76k | ptr += MINBPC(enc); | 854 | 6.76k | if (! HAS_CHAR(enc, ptr, end)) | 855 | 183 | return XML_TOK_TRAILING_RSQB; | 856 | 6.58k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 857 | 4.06k | break; | 858 | 2.51k | ptr += MINBPC(enc); | 859 | 2.51k | if (! HAS_CHAR(enc, ptr, end)) | 860 | 140 | return XML_TOK_TRAILING_RSQB; | 861 | 2.37k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 862 | 2.37k | ptr -= MINBPC(enc); | 863 | 2.37k | break; | 864 | 2.37k | } | 865 | 6 | *nextTokPtr = ptr; | 866 | 6 | return XML_TOK_INVALID; | 867 | 10.4k | INVALID_CASES(ptr, nextTokPtr) | 868 | 301k | default: | 869 | 301k | ptr += MINBPC(enc); | 870 | 301k | break; | 871 | 1.22M | } | 872 | 5.91M | while (HAS_CHAR(enc, ptr, end)) { | 873 | 5.91M | switch (BYTE_TYPE(enc, ptr)) { | 874 | 0 | # define LEAD_CASE(n) \ | 875 | 0 | case BT_LEAD##n: \ | 876 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 877 | 0 | *nextTokPtr = ptr; \ | 878 | 0 | return XML_TOK_DATA_CHARS; \ | 879 | 0 | } \ | 880 | 0 | ptr += n; \ | 881 | 0 | break; | 882 | 0 | LEAD_CASE(2) | 883 | 0 | LEAD_CASE(3) | 884 | 509k | LEAD_CASE(4) | 885 | 0 | # undef LEAD_CASE | 886 | 221k | case BT_RSQB: | 887 | 221k | if (HAS_CHARS(enc, ptr, end, 2)) { | 888 | 221k | if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { | 889 | 115k | ptr += MINBPC(enc); | 890 | 115k | break; | 891 | 115k | } | 892 | 105k | if (HAS_CHARS(enc, ptr, end, 3)) { | 893 | 105k | if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { | 894 | 105k | ptr += MINBPC(enc); | 895 | 105k | break; | 896 | 105k | } | 897 | 6 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 898 | 6 | return XML_TOK_INVALID; | 899 | 105k | } | 900 | 105k | } | 901 | | /* fall through */ | 902 | 31.9k | case BT_AMP: | 903 | 230k | case BT_LT: | 904 | 230k | case BT_NONXML: | 905 | 230k | case BT_MALFORM: | 906 | 230k | case BT_TRAIL: | 907 | 258k | case BT_CR: | 908 | 308k | case BT_LF: | 909 | 308k | *nextTokPtr = ptr; | 910 | 308k | return XML_TOK_DATA_CHARS; | 911 | 4.87M | default: | 912 | 4.87M | ptr += MINBPC(enc); | 913 | 4.87M | break; | 914 | 5.91M | } | 915 | 5.91M | } | 916 | 3.08k | *nextTokPtr = ptr; | 917 | 3.08k | return XML_TOK_DATA_CHARS; | 918 | 312k | } |
Line | Count | Source | 824 | 1.36M | const char **nextTokPtr) { | 825 | 1.36M | if (ptr >= end) | 826 | 6.34k | return XML_TOK_NONE; | 827 | 1.35M | if (MINBPC(enc) > 1) { | 828 | 1.35M | size_t n = end - ptr; | 829 | 1.35M | if (n & (MINBPC(enc) - 1)) { | 830 | 45.7k | n &= ~(MINBPC(enc) - 1); | 831 | 45.7k | if (n == 0) | 832 | 995 | return XML_TOK_PARTIAL; | 833 | 44.7k | end = ptr + n; | 834 | 44.7k | } | 835 | 1.35M | } | 836 | 1.35M | switch (BYTE_TYPE(enc, ptr)) { | 837 | 436k | case BT_LT: | 838 | 436k | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 839 | 148k | case BT_AMP: | 840 | 148k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 841 | 449k | case BT_CR: | 842 | 449k | ptr += MINBPC(enc); | 843 | 449k | if (! HAS_CHAR(enc, ptr, end)) | 844 | 306 | return XML_TOK_TRAILING_CR; | 845 | 449k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 846 | 13.5k | ptr += MINBPC(enc); | 847 | 449k | *nextTokPtr = ptr; | 848 | 449k | return XML_TOK_DATA_NEWLINE; | 849 | 23.7k | case BT_LF: | 850 | 23.7k | *nextTokPtr = ptr + MINBPC(enc); | 851 | 23.7k | return XML_TOK_DATA_NEWLINE; | 852 | 7.09k | case BT_RSQB: | 853 | 7.09k | ptr += MINBPC(enc); | 854 | 7.09k | if (! HAS_CHAR(enc, ptr, end)) | 855 | 185 | return XML_TOK_TRAILING_RSQB; | 856 | 6.90k | if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) | 857 | 4.78k | break; | 858 | 2.12k | ptr += MINBPC(enc); | 859 | 2.12k | if (! HAS_CHAR(enc, ptr, end)) | 860 | 109 | return XML_TOK_TRAILING_RSQB; | 861 | 2.01k | if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { | 862 | 2.01k | ptr -= MINBPC(enc); | 863 | 2.01k | break; | 864 | 2.01k | } | 865 | 6 | *nextTokPtr = ptr; | 866 | 6 | return XML_TOK_INVALID; | 867 | 21.8k | INVALID_CASES(ptr, nextTokPtr) | 868 | 280k | default: | 869 | 280k | ptr += MINBPC(enc); | 870 | 280k | break; | 871 | 1.35M | } | 872 | 9.13M | while (HAS_CHAR(enc, ptr, end)) { | 873 | 9.13M | switch (BYTE_TYPE(enc, ptr)) { | 874 | 0 | # define LEAD_CASE(n) \ | 875 | 0 | case BT_LEAD##n: \ | 876 | 0 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ | 877 | 0 | *nextTokPtr = ptr; \ | 878 | 0 | return XML_TOK_DATA_CHARS; \ | 879 | 0 | } \ | 880 | 0 | ptr += n; \ | 881 | 0 | break; | 882 | 0 | LEAD_CASE(2) | 883 | 0 | LEAD_CASE(3) | 884 | 856k | LEAD_CASE(4) | 885 | 0 | # undef LEAD_CASE | 886 | 154k | case BT_RSQB: | 887 | 154k | if (HAS_CHARS(enc, ptr, end, 2)) { | 888 | 154k | if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { | 889 | 77.0k | ptr += MINBPC(enc); | 890 | 77.0k | break; | 891 | 77.0k | } | 892 | 77.5k | if (HAS_CHARS(enc, ptr, end, 3)) { | 893 | 77.4k | if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { | 894 | 77.4k | ptr += MINBPC(enc); | 895 | 77.4k | break; | 896 | 77.4k | } | 897 | 11 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 898 | 11 | return XML_TOK_INVALID; | 899 | 77.4k | } | 900 | 77.5k | } | 901 | | /* fall through */ | 902 | 129k | case BT_AMP: | 903 | 196k | case BT_LT: | 904 | 196k | case BT_NONXML: | 905 | 196k | case BT_MALFORM: | 906 | 196k | case BT_TRAIL: | 907 | 278k | case BT_CR: | 908 | 292k | case BT_LF: | 909 | 292k | *nextTokPtr = ptr; | 910 | 292k | return XML_TOK_DATA_CHARS; | 911 | 7.83M | default: | 912 | 7.83M | ptr += MINBPC(enc); | 913 | 7.83M | break; | 914 | 9.13M | } | 915 | 9.13M | } | 916 | 4.44k | *nextTokPtr = ptr; | 917 | 4.44k | return XML_TOK_DATA_CHARS; | 918 | 297k | } |
|
919 | | |
920 | | /* ptr points to character following "%" */ |
921 | | |
922 | | static int PTRCALL |
923 | | PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
924 | 500k | const char **nextTokPtr) { |
925 | 500k | REQUIRE_CHAR(enc, ptr, end); |
926 | 500k | switch (BYTE_TYPE(enc, ptr)) { |
927 | 190k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
928 | 9.27k | case BT_S: |
929 | 29.8k | case BT_LF: |
930 | 128k | case BT_CR: |
931 | 128k | case BT_PERCNT: |
932 | 128k | *nextTokPtr = ptr; |
933 | 128k | return XML_TOK_PERCENT; |
934 | 217 | default: |
935 | 217 | *nextTokPtr = ptr; |
936 | 217 | return XML_TOK_INVALID; |
937 | 500k | } |
938 | 10.9M | while (HAS_CHAR(enc, ptr, end)) { |
939 | 10.9M | switch (BYTE_TYPE(enc, ptr)) { |
940 | 45.6M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
941 | 368k | case BT_SEMI: |
942 | 368k | *nextTokPtr = ptr + MINBPC(enc); |
943 | 368k | return XML_TOK_PARAM_ENTITY_REF; |
944 | 302 | default: |
945 | 302 | *nextTokPtr = ptr; |
946 | 302 | return XML_TOK_INVALID; |
947 | 10.9M | } |
948 | 10.9M | } |
949 | 1.67k | return XML_TOK_PARTIAL; |
950 | 371k | } xmltok.c:normal_scanPercent Line | Count | Source | 924 | 489k | const char **nextTokPtr) { | 925 | 489k | REQUIRE_CHAR(enc, ptr, end); | 926 | 489k | switch (BYTE_TYPE(enc, ptr)) { | 927 | 187k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 928 | 2.23k | case BT_S: | 929 | 21.8k | case BT_LF: | 930 | 120k | case BT_CR: | 931 | 120k | case BT_PERCNT: | 932 | 120k | *nextTokPtr = ptr; | 933 | 120k | return XML_TOK_PERCENT; | 934 | 87 | default: | 935 | 87 | *nextTokPtr = ptr; | 936 | 87 | return XML_TOK_INVALID; | 937 | 489k | } | 938 | 4.47M | while (HAS_CHAR(enc, ptr, end)) { | 939 | 4.47M | switch (BYTE_TYPE(enc, ptr)) { | 940 | 13.5M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 941 | 367k | case BT_SEMI: | 942 | 367k | *nextTokPtr = ptr + MINBPC(enc); | 943 | 367k | return XML_TOK_PARAM_ENTITY_REF; | 944 | 149 | default: | 945 | 149 | *nextTokPtr = ptr; | 946 | 149 | return XML_TOK_INVALID; | 947 | 4.47M | } | 948 | 4.47M | } | 949 | 957 | return XML_TOK_PARTIAL; | 950 | 368k | } |
xmltok.c:little2_scanPercent Line | Count | Source | 924 | 4.34k | const char **nextTokPtr) { | 925 | 4.34k | REQUIRE_CHAR(enc, ptr, end); | 926 | 4.32k | switch (BYTE_TYPE(enc, ptr)) { | 927 | 1.55k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 928 | 2.61k | case BT_S: | 929 | 2.73k | case BT_LF: | 930 | 2.98k | case BT_CR: | 931 | 2.99k | case BT_PERCNT: | 932 | 2.99k | *nextTokPtr = ptr; | 933 | 2.99k | return XML_TOK_PERCENT; | 934 | 72 | default: | 935 | 72 | *nextTokPtr = ptr; | 936 | 72 | return XML_TOK_INVALID; | 937 | 4.32k | } | 938 | 3.83M | while (HAS_CHAR(enc, ptr, end)) { | 939 | 3.83M | switch (BYTE_TYPE(enc, ptr)) { | 940 | 19.1M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 941 | 584 | case BT_SEMI: | 942 | 584 | *nextTokPtr = ptr + MINBPC(enc); | 943 | 584 | return XML_TOK_PARAM_ENTITY_REF; | 944 | 91 | default: | 945 | 91 | *nextTokPtr = ptr; | 946 | 91 | return XML_TOK_INVALID; | 947 | 3.83M | } | 948 | 3.83M | } | 949 | 354 | return XML_TOK_PARTIAL; | 950 | 1.15k | } |
xmltok.c:big2_scanPercent Line | Count | Source | 924 | 6.87k | const char **nextTokPtr) { | 925 | 6.87k | REQUIRE_CHAR(enc, ptr, end); | 926 | 6.85k | switch (BYTE_TYPE(enc, ptr)) { | 927 | 1.50k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 928 | 4.42k | case BT_S: | 929 | 5.24k | case BT_LF: | 930 | 5.58k | case BT_CR: | 931 | 5.59k | case BT_PERCNT: | 932 | 5.59k | *nextTokPtr = ptr; | 933 | 5.59k | return XML_TOK_PERCENT; | 934 | 58 | default: | 935 | 58 | *nextTokPtr = ptr; | 936 | 58 | return XML_TOK_INVALID; | 937 | 6.85k | } | 938 | 2.59M | while (HAS_CHAR(enc, ptr, end)) { | 939 | 2.59M | switch (BYTE_TYPE(enc, ptr)) { | 940 | 12.9M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 941 | 521 | case BT_SEMI: | 942 | 521 | *nextTokPtr = ptr + MINBPC(enc); | 943 | 521 | return XML_TOK_PARAM_ENTITY_REF; | 944 | 62 | default: | 945 | 62 | *nextTokPtr = ptr; | 946 | 62 | return XML_TOK_INVALID; | 947 | 2.59M | } | 948 | 2.59M | } | 949 | 368 | return XML_TOK_PARTIAL; | 950 | 1.08k | } |
|
951 | | |
952 | | static int PTRCALL |
953 | | PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
954 | 119k | const char **nextTokPtr) { |
955 | 119k | REQUIRE_CHAR(enc, ptr, end); |
956 | 119k | switch (BYTE_TYPE(enc, ptr)) { |
957 | 116k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
958 | 213 | default: |
959 | 213 | *nextTokPtr = ptr; |
960 | 213 | return XML_TOK_INVALID; |
961 | 119k | } |
962 | 18.9M | while (HAS_CHAR(enc, ptr, end)) { |
963 | 18.9M | switch (BYTE_TYPE(enc, ptr)) { |
964 | 69.3M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
965 | 104k | case BT_CR: |
966 | 105k | case BT_LF: |
967 | 108k | case BT_S: |
968 | 111k | case BT_RPAR: |
969 | 113k | case BT_GT: |
970 | 113k | case BT_PERCNT: |
971 | 116k | case BT_VERBAR: |
972 | 116k | *nextTokPtr = ptr; |
973 | 116k | return XML_TOK_POUND_NAME; |
974 | 237 | default: |
975 | 237 | *nextTokPtr = ptr; |
976 | 237 | return XML_TOK_INVALID; |
977 | 18.9M | } |
978 | 18.9M | } |
979 | 1.34k | return -XML_TOK_POUND_NAME; |
980 | 118k | } xmltok.c:normal_scanPoundName Line | Count | Source | 954 | 104k | const char **nextTokPtr) { | 955 | 104k | REQUIRE_CHAR(enc, ptr, end); | 956 | 104k | switch (BYTE_TYPE(enc, ptr)) { | 957 | 103k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 958 | 81 | default: | 959 | 81 | *nextTokPtr = ptr; | 960 | 81 | return XML_TOK_INVALID; | 961 | 104k | } | 962 | 14.2M | while (HAS_CHAR(enc, ptr, end)) { | 963 | 14.2M | switch (BYTE_TYPE(enc, ptr)) { | 964 | 45.9M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 965 | 98.5k | case BT_CR: | 966 | 99.5k | case BT_LF: | 967 | 101k | case BT_S: | 968 | 101k | case BT_RPAR: | 969 | 102k | case BT_GT: | 970 | 102k | case BT_PERCNT: | 971 | 102k | case BT_VERBAR: | 972 | 102k | *nextTokPtr = ptr; | 973 | 102k | return XML_TOK_POUND_NAME; | 974 | 103 | default: | 975 | 103 | *nextTokPtr = ptr; | 976 | 103 | return XML_TOK_INVALID; | 977 | 14.2M | } | 978 | 14.2M | } | 979 | 743 | return -XML_TOK_POUND_NAME; | 980 | 103k | } |
xmltok.c:little2_scanPoundName Line | Count | Source | 954 | 10.6k | const char **nextTokPtr) { | 955 | 10.6k | REQUIRE_CHAR(enc, ptr, end); | 956 | 10.6k | switch (BYTE_TYPE(enc, ptr)) { | 957 | 9.74k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 958 | 68 | default: | 959 | 68 | *nextTokPtr = ptr; | 960 | 68 | return XML_TOK_INVALID; | 961 | 10.6k | } | 962 | 4.29M | while (HAS_CHAR(enc, ptr, end)) { | 963 | 4.29M | switch (BYTE_TYPE(enc, ptr)) { | 964 | 21.3M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 965 | 3.81k | case BT_CR: | 966 | 3.91k | case BT_LF: | 967 | 4.13k | case BT_S: | 968 | 7.02k | case BT_RPAR: | 969 | 7.08k | case BT_GT: | 970 | 7.08k | case BT_PERCNT: | 971 | 9.91k | case BT_VERBAR: | 972 | 9.91k | *nextTokPtr = ptr; | 973 | 9.91k | return XML_TOK_POUND_NAME; | 974 | 65 | default: | 975 | 65 | *nextTokPtr = ptr; | 976 | 65 | return XML_TOK_INVALID; | 977 | 4.29M | } | 978 | 4.29M | } | 979 | 345 | return -XML_TOK_POUND_NAME; | 980 | 10.4k | } |
xmltok.c:big2_scanPoundName Line | Count | Source | 954 | 4.70k | const char **nextTokPtr) { | 955 | 4.70k | REQUIRE_CHAR(enc, ptr, end); | 956 | 4.66k | switch (BYTE_TYPE(enc, ptr)) { | 957 | 3.47k | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) | 958 | 64 | default: | 959 | 64 | *nextTokPtr = ptr; | 960 | 64 | return XML_TOK_INVALID; | 961 | 4.66k | } | 962 | 422k | while (HAS_CHAR(enc, ptr, end)) { | 963 | 422k | switch (BYTE_TYPE(enc, ptr)) { | 964 | 2.05M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 965 | 1.85k | case BT_CR: | 966 | 2.12k | case BT_LF: | 967 | 3.27k | case BT_S: | 968 | 3.34k | case BT_RPAR: | 969 | 3.45k | case BT_GT: | 970 | 3.45k | case BT_PERCNT: | 971 | 4.07k | case BT_VERBAR: | 972 | 4.07k | *nextTokPtr = ptr; | 973 | 4.07k | return XML_TOK_POUND_NAME; | 974 | 69 | default: | 975 | 69 | *nextTokPtr = ptr; | 976 | 69 | return XML_TOK_INVALID; | 977 | 422k | } | 978 | 422k | } | 979 | 261 | return -XML_TOK_POUND_NAME; | 980 | 4.51k | } |
|
981 | | |
982 | | static int PTRCALL |
983 | | PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, |
984 | 405k | const char **nextTokPtr) { |
985 | 1.35G | while (HAS_CHAR(enc, ptr, end)) { |
986 | 1.35G | int t = BYTE_TYPE(enc, ptr); |
987 | 1.35G | switch (t) { |
988 | 19.0M | INVALID_CASES(ptr, nextTokPtr) |
989 | 1.57M | case BT_QUOT: |
990 | 2.18M | case BT_APOS: |
991 | 2.18M | ptr += MINBPC(enc); |
992 | 2.18M | if (t != open) |
993 | 1.80M | break; |
994 | 380k | if (! HAS_CHAR(enc, ptr, end)) |
995 | 2.76k | return -XML_TOK_LITERAL; |
996 | 377k | *nextTokPtr = ptr; |
997 | 377k | switch (BYTE_TYPE(enc, ptr)) { |
998 | 28.1k | case BT_S: |
999 | 36.5k | case BT_CR: |
1000 | 79.0k | case BT_LF: |
1001 | 376k | case BT_GT: |
1002 | 376k | case BT_PERCNT: |
1003 | 377k | case BT_LSQB: |
1004 | 377k | return XML_TOK_LITERAL; |
1005 | 186 | default: |
1006 | 186 | return XML_TOK_INVALID; |
1007 | 377k | } |
1008 | 1.34G | default: |
1009 | 1.34G | ptr += MINBPC(enc); |
1010 | 1.34G | break; |
1011 | 1.35G | } |
1012 | 1.35G | } |
1013 | 23.5k | return XML_TOK_PARTIAL; |
1014 | 405k | } Line | Count | Source | 984 | 277k | const char **nextTokPtr) { | 985 | 1.10G | while (HAS_CHAR(enc, ptr, end)) { | 986 | 1.10G | int t = BYTE_TYPE(enc, ptr); | 987 | 1.10G | switch (t) { | 988 | 2.17M | INVALID_CASES(ptr, nextTokPtr) | 989 | 789k | case BT_QUOT: | 990 | 1.27M | case BT_APOS: | 991 | 1.27M | ptr += MINBPC(enc); | 992 | 1.27M | if (t != open) | 993 | 1.01M | break; | 994 | 261k | if (! HAS_CHAR(enc, ptr, end)) | 995 | 1.32k | return -XML_TOK_LITERAL; | 996 | 260k | *nextTokPtr = ptr; | 997 | 260k | switch (BYTE_TYPE(enc, ptr)) { | 998 | 24.1k | case BT_S: | 999 | 25.3k | case BT_CR: | 1000 | 64.5k | case BT_LF: | 1001 | 259k | case BT_GT: | 1002 | 259k | case BT_PERCNT: | 1003 | 260k | case BT_LSQB: | 1004 | 260k | return XML_TOK_LITERAL; | 1005 | 105 | default: | 1006 | 105 | return XML_TOK_INVALID; | 1007 | 260k | } | 1008 | 1.10G | default: | 1009 | 1.10G | ptr += MINBPC(enc); | 1010 | 1.10G | break; | 1011 | 1.10G | } | 1012 | 1.10G | } | 1013 | 14.6k | return XML_TOK_PARTIAL; | 1014 | 277k | } |
Line | Count | Source | 984 | 23.7k | const char **nextTokPtr) { | 985 | 119M | while (HAS_CHAR(enc, ptr, end)) { | 986 | 119M | int t = BYTE_TYPE(enc, ptr); | 987 | 119M | switch (t) { | 988 | 6.54M | INVALID_CASES(ptr, nextTokPtr) | 989 | 679k | case BT_QUOT: | 990 | 699k | case BT_APOS: | 991 | 699k | ptr += MINBPC(enc); | 992 | 699k | if (t != open) | 993 | 680k | break; | 994 | 18.9k | if (! HAS_CHAR(enc, ptr, end)) | 995 | 698 | return -XML_TOK_LITERAL; | 996 | 18.2k | *nextTokPtr = ptr; | 997 | 18.2k | switch (BYTE_TYPE(enc, ptr)) { | 998 | 1.11k | case BT_S: | 999 | 4.17k | case BT_CR: | 1000 | 5.25k | case BT_LF: | 1001 | 18.0k | case BT_GT: | 1002 | 18.1k | case BT_PERCNT: | 1003 | 18.2k | case BT_LSQB: | 1004 | 18.2k | return XML_TOK_LITERAL; | 1005 | 45 | default: | 1006 | 45 | return XML_TOK_INVALID; | 1007 | 18.2k | } | 1008 | 116M | default: | 1009 | 116M | ptr += MINBPC(enc); | 1010 | 116M | break; | 1011 | 119M | } | 1012 | 119M | } | 1013 | 4.43k | return XML_TOK_PARTIAL; | 1014 | 23.7k | } |
Line | Count | Source | 984 | 104k | const char **nextTokPtr) { | 985 | 132M | while (HAS_CHAR(enc, ptr, end)) { | 986 | 132M | int t = BYTE_TYPE(enc, ptr); | 987 | 132M | switch (t) { | 988 | 10.2M | INVALID_CASES(ptr, nextTokPtr) | 989 | 105k | case BT_QUOT: | 990 | 206k | case BT_APOS: | 991 | 206k | ptr += MINBPC(enc); | 992 | 206k | if (t != open) | 993 | 107k | break; | 994 | 99.6k | if (! HAS_CHAR(enc, ptr, end)) | 995 | 736 | return -XML_TOK_LITERAL; | 996 | 98.9k | *nextTokPtr = ptr; | 997 | 98.9k | switch (BYTE_TYPE(enc, ptr)) { | 998 | 2.83k | case BT_S: | 999 | 7.03k | case BT_CR: | 1000 | 9.27k | case BT_LF: | 1001 | 98.4k | case BT_GT: | 1002 | 98.4k | case BT_PERCNT: | 1003 | 98.8k | case BT_LSQB: | 1004 | 98.8k | return XML_TOK_LITERAL; | 1005 | 36 | default: | 1006 | 36 | return XML_TOK_INVALID; | 1007 | 98.9k | } | 1008 | 127M | default: | 1009 | 127M | ptr += MINBPC(enc); | 1010 | 127M | break; | 1011 | 132M | } | 1012 | 132M | } | 1013 | 4.46k | return XML_TOK_PARTIAL; | 1014 | 104k | } |
|
1015 | | |
1016 | | static int PTRCALL |
1017 | | PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
1018 | 21.4M | const char **nextTokPtr) { |
1019 | 21.4M | int tok; |
1020 | 21.4M | if (ptr >= end) |
1021 | 11.7k | return XML_TOK_NONE; |
1022 | 21.3M | if (MINBPC(enc) > 1) { |
1023 | 3.02M | size_t n = end - ptr; |
1024 | 3.02M | if (n & (MINBPC(enc) - 1)) { |
1025 | 128k | n &= ~(MINBPC(enc) - 1); |
1026 | 128k | if (n == 0) |
1027 | 284 | return XML_TOK_PARTIAL; |
1028 | 128k | end = ptr + n; |
1029 | 128k | } |
1030 | 3.02M | } |
1031 | 21.3M | switch (BYTE_TYPE(enc, ptr)) { |
1032 | 229k | case BT_QUOT: |
1033 | 229k | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
1034 | 175k | case BT_APOS: |
1035 | 175k | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
1036 | 944k | case BT_LT: { |
1037 | 944k | ptr += MINBPC(enc); |
1038 | 944k | REQUIRE_CHAR(enc, ptr, end); |
1039 | 943k | switch (BYTE_TYPE(enc, ptr)) { |
1040 | 823k | case BT_EXCL: |
1041 | 823k | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1042 | 72.6k | case BT_QUEST: |
1043 | 72.6k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1044 | 21.3k | case BT_NMSTRT: |
1045 | 35.2k | case BT_HEX: |
1046 | 45.1k | case BT_NONASCII: |
1047 | 45.2k | case BT_LEAD2: |
1048 | 45.4k | case BT_LEAD3: |
1049 | 45.5k | case BT_LEAD4: |
1050 | 45.5k | *nextTokPtr = ptr - MINBPC(enc); |
1051 | 45.5k | return XML_TOK_INSTANCE_START; |
1052 | 943k | } |
1053 | 2.01k | *nextTokPtr = ptr; |
1054 | 2.01k | return XML_TOK_INVALID; |
1055 | 943k | } |
1056 | 698k | case BT_CR: |
1057 | 698k | if (ptr + MINBPC(enc) == end) { |
1058 | 4.83k | *nextTokPtr = end; |
1059 | | /* indicate that this might be part of a CR/LF pair */ |
1060 | 4.83k | return -XML_TOK_PROLOG_S; |
1061 | 4.83k | } |
1062 | | /* fall through */ |
1063 | 1.38M | case BT_S: |
1064 | 2.02M | case BT_LF: |
1065 | 20.2M | for (;;) { |
1066 | 20.2M | ptr += MINBPC(enc); |
1067 | 20.2M | if (! HAS_CHAR(enc, ptr, end)) |
1068 | 1.65k | break; |
1069 | 20.2M | switch (BYTE_TYPE(enc, ptr)) { |
1070 | 1.24M | case BT_S: |
1071 | 1.57M | case BT_LF: |
1072 | 1.57M | break; |
1073 | 16.6M | case BT_CR: |
1074 | | /* don't split CR/LF pair */ |
1075 | 16.6M | if (ptr + MINBPC(enc) != end) |
1076 | 16.6M | break; |
1077 | | /* fall through */ |
1078 | 2.01M | default: |
1079 | 2.01M | *nextTokPtr = ptr; |
1080 | 2.01M | return XML_TOK_PROLOG_S; |
1081 | 20.2M | } |
1082 | 20.2M | } |
1083 | 1.65k | *nextTokPtr = ptr; |
1084 | 1.65k | return XML_TOK_PROLOG_S; |
1085 | 499k | case BT_PERCNT: |
1086 | 499k | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1087 | 133k | case BT_COMMA: |
1088 | 133k | *nextTokPtr = ptr + MINBPC(enc); |
1089 | 133k | return XML_TOK_COMMA; |
1090 | 21.6k | case BT_LSQB: |
1091 | 21.6k | *nextTokPtr = ptr + MINBPC(enc); |
1092 | 21.6k | return XML_TOK_OPEN_BRACKET; |
1093 | 13.6k | case BT_RSQB: |
1094 | 13.6k | ptr += MINBPC(enc); |
1095 | 13.6k | if (! HAS_CHAR(enc, ptr, end)) |
1096 | 53 | return -XML_TOK_CLOSE_BRACKET; |
1097 | 13.6k | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1098 | 239 | REQUIRE_CHARS(enc, ptr, end, 2); |
1099 | 214 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
1100 | 25 | *nextTokPtr = ptr + 2 * MINBPC(enc); |
1101 | 25 | return XML_TOK_COND_SECT_CLOSE; |
1102 | 25 | } |
1103 | 214 | } |
1104 | 13.5k | *nextTokPtr = ptr; |
1105 | 13.5k | return XML_TOK_CLOSE_BRACKET; |
1106 | 8.16M | case BT_LPAR: |
1107 | 8.16M | *nextTokPtr = ptr + MINBPC(enc); |
1108 | 8.16M | return XML_TOK_OPEN_PAREN; |
1109 | 399k | case BT_RPAR: |
1110 | 399k | ptr += MINBPC(enc); |
1111 | 399k | if (! HAS_CHAR(enc, ptr, end)) |
1112 | 248 | return -XML_TOK_CLOSE_PAREN; |
1113 | 399k | switch (BYTE_TYPE(enc, ptr)) { |
1114 | 15.8k | case BT_AST: |
1115 | 15.8k | *nextTokPtr = ptr + MINBPC(enc); |
1116 | 15.8k | return XML_TOK_CLOSE_PAREN_ASTERISK; |
1117 | 17.9k | case BT_QUEST: |
1118 | 17.9k | *nextTokPtr = ptr + MINBPC(enc); |
1119 | 17.9k | return XML_TOK_CLOSE_PAREN_QUESTION; |
1120 | 84.9k | case BT_PLUS: |
1121 | 84.9k | *nextTokPtr = ptr + MINBPC(enc); |
1122 | 84.9k | return XML_TOK_CLOSE_PAREN_PLUS; |
1123 | 675 | case BT_CR: |
1124 | 3.57k | case BT_LF: |
1125 | 180k | case BT_S: |
1126 | 182k | case BT_GT: |
1127 | 210k | case BT_COMMA: |
1128 | 218k | case BT_VERBAR: |
1129 | 280k | case BT_RPAR: |
1130 | 280k | *nextTokPtr = ptr; |
1131 | 280k | return XML_TOK_CLOSE_PAREN; |
1132 | 399k | } |
1133 | 198 | *nextTokPtr = ptr; |
1134 | 198 | return XML_TOK_INVALID; |
1135 | 3.63M | case BT_VERBAR: |
1136 | 3.63M | *nextTokPtr = ptr + MINBPC(enc); |
1137 | 3.63M | return XML_TOK_OR; |
1138 | 345k | case BT_GT: |
1139 | 345k | *nextTokPtr = ptr + MINBPC(enc); |
1140 | 345k | return XML_TOK_DECL_CLOSE; |
1141 | 119k | case BT_NUM: |
1142 | 119k | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1143 | 0 | # define LEAD_CASE(n) \ |
1144 | 463k | case BT_LEAD##n: \ |
1145 | 463k | if (end - ptr < n) \ |
1146 | 729 | return XML_TOK_PARTIAL_CHAR; \ |
1147 | 463k | if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
1148 | 354 | *nextTokPtr = ptr; \ |
1149 | 354 | return XML_TOK_INVALID; \ |
1150 | 354 | } \ |
1151 | 463k | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
1152 | 174k | ptr += n; \ |
1153 | 174k | tok = XML_TOK_NAME; \ |
1154 | 174k | break; \ |
1155 | 174k | } \ |
1156 | 462k | if (IS_NAME_CHAR(enc, ptr, n)) { \ |
1157 | 288k | ptr += n; \ |
1158 | 288k | tok = XML_TOK_NMTOKEN; \ |
1159 | 288k | break; \ |
1160 | 288k | } \ |
1161 | 288k | *nextTokPtr = ptr; \ |
1162 | 190 | return XML_TOK_INVALID; |
1163 | 1.15M | LEAD_CASE(2) |
1164 | 518k | LEAD_CASE(3) |
1165 | 882 | LEAD_CASE(4) |
1166 | 0 | # undef LEAD_CASE |
1167 | 2.54M | case BT_NMSTRT: |
1168 | 2.96M | case BT_HEX: |
1169 | 2.96M | tok = XML_TOK_NAME; |
1170 | 2.96M | ptr += MINBPC(enc); |
1171 | 2.96M | break; |
1172 | 15.7k | case BT_DIGIT: |
1173 | 131k | case BT_NAME: |
1174 | 537k | case BT_MINUS: |
1175 | 537k | # ifdef XML_NS |
1176 | 910k | case BT_COLON: |
1177 | 910k | # endif |
1178 | 910k | tok = XML_TOK_NMTOKEN; |
1179 | 910k | ptr += MINBPC(enc); |
1180 | 910k | break; |
1181 | 337k | case BT_NONASCII: |
1182 | 337k | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
1183 | 336k | ptr += MINBPC(enc); |
1184 | 336k | tok = XML_TOK_NAME; |
1185 | 336k | break; |
1186 | 336k | } |
1187 | 1.04k | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
1188 | 676 | ptr += MINBPC(enc); |
1189 | 676 | tok = XML_TOK_NMTOKEN; |
1190 | 676 | break; |
1191 | 676 | } |
1192 | | /* fall through */ |
1193 | 1.82k | default: |
1194 | 1.82k | *nextTokPtr = ptr; |
1195 | 1.82k | return XML_TOK_INVALID; |
1196 | 21.3M | } |
1197 | 158M | while (HAS_CHAR(enc, ptr, end)) { |
1198 | 158M | switch (BYTE_TYPE(enc, ptr)) { |
1199 | 476M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1200 | 8.73k | case BT_GT: |
1201 | 145k | case BT_RPAR: |
1202 | 149k | case BT_COMMA: |
1203 | 3.69M | case BT_VERBAR: |
1204 | 3.71M | case BT_LSQB: |
1205 | 3.71M | case BT_PERCNT: |
1206 | 3.97M | case BT_S: |
1207 | 4.25M | case BT_CR: |
1208 | 4.53M | case BT_LF: |
1209 | 4.53M | *nextTokPtr = ptr; |
1210 | 4.53M | return tok; |
1211 | 0 | # ifdef XML_NS |
1212 | 1.12M | case BT_COLON: |
1213 | 1.12M | ptr += MINBPC(enc); |
1214 | 1.12M | switch (tok) { |
1215 | 924k | case XML_TOK_NAME: |
1216 | 924k | REQUIRE_CHAR(enc, ptr, end); |
1217 | 923k | tok = XML_TOK_PREFIXED_NAME; |
1218 | 923k | switch (BYTE_TYPE(enc, ptr)) { |
1219 | 1.92M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1220 | 57.7k | default: |
1221 | 57.7k | tok = XML_TOK_NMTOKEN; |
1222 | 57.7k | break; |
1223 | 923k | } |
1224 | 923k | break; |
1225 | 923k | case XML_TOK_PREFIXED_NAME: |
1226 | 69.6k | tok = XML_TOK_NMTOKEN; |
1227 | 69.6k | break; |
1228 | 1.12M | } |
1229 | 1.12M | break; |
1230 | 1.12M | # endif |
1231 | 1.12M | case BT_PLUS: |
1232 | 97.8k | if (tok == XML_TOK_NMTOKEN) { |
1233 | 23 | *nextTokPtr = ptr; |
1234 | 23 | return XML_TOK_INVALID; |
1235 | 23 | } |
1236 | 97.7k | *nextTokPtr = ptr + MINBPC(enc); |
1237 | 97.7k | return XML_TOK_NAME_PLUS; |
1238 | 1.73k | case BT_AST: |
1239 | 1.73k | if (tok == XML_TOK_NMTOKEN) { |
1240 | 26 | *nextTokPtr = ptr; |
1241 | 26 | return XML_TOK_INVALID; |
1242 | 26 | } |
1243 | 1.71k | *nextTokPtr = ptr + MINBPC(enc); |
1244 | 1.71k | return XML_TOK_NAME_ASTERISK; |
1245 | 27.5k | case BT_QUEST: |
1246 | 27.5k | if (tok == XML_TOK_NMTOKEN) { |
1247 | 38 | *nextTokPtr = ptr; |
1248 | 38 | return XML_TOK_INVALID; |
1249 | 38 | } |
1250 | 27.5k | *nextTokPtr = ptr + MINBPC(enc); |
1251 | 27.5k | return XML_TOK_NAME_QUESTION; |
1252 | 901 | default: |
1253 | 901 | *nextTokPtr = ptr; |
1254 | 901 | return XML_TOK_INVALID; |
1255 | 158M | } |
1256 | 158M | } |
1257 | 14.4k | return -tok; |
1258 | 4.67M | } xmltok.c:normal_prologTok Line | Count | Source | 1018 | 18.3M | const char **nextTokPtr) { | 1019 | 18.3M | int tok; | 1020 | 18.3M | if (ptr >= end) | 1021 | 9.42k | return XML_TOK_NONE; | 1022 | 18.3M | if (MINBPC(enc) > 1) { | 1023 | 0 | size_t n = end - ptr; | 1024 | 0 | if (n & (MINBPC(enc) - 1)) { | 1025 | 0 | n &= ~(MINBPC(enc) - 1); | 1026 | 0 | if (n == 0) | 1027 | 0 | return XML_TOK_PARTIAL; | 1028 | 0 | end = ptr + n; | 1029 | 0 | } | 1030 | 0 | } | 1031 | 18.3M | switch (BYTE_TYPE(enc, ptr)) { | 1032 | 226k | case BT_QUOT: | 1033 | 226k | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1034 | 50.9k | case BT_APOS: | 1035 | 50.9k | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1036 | 731k | case BT_LT: { | 1037 | 731k | ptr += MINBPC(enc); | 1038 | 731k | REQUIRE_CHAR(enc, ptr, end); | 1039 | 730k | switch (BYTE_TYPE(enc, ptr)) { | 1040 | 685k | case BT_EXCL: | 1041 | 685k | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1042 | 19.4k | case BT_QUEST: | 1043 | 19.4k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1044 | 14.0k | case BT_NMSTRT: | 1045 | 24.8k | case BT_HEX: | 1046 | 24.8k | case BT_NONASCII: | 1047 | 24.8k | case BT_LEAD2: | 1048 | 25.0k | case BT_LEAD3: | 1049 | 25.0k | case BT_LEAD4: | 1050 | 25.0k | *nextTokPtr = ptr - MINBPC(enc); | 1051 | 25.0k | return XML_TOK_INSTANCE_START; | 1052 | 730k | } | 1053 | 607 | *nextTokPtr = ptr; | 1054 | 607 | return XML_TOK_INVALID; | 1055 | 730k | } | 1056 | 658k | case BT_CR: | 1057 | 658k | if (ptr + MINBPC(enc) == end) { | 1058 | 4.52k | *nextTokPtr = end; | 1059 | | /* indicate that this might be part of a CR/LF pair */ | 1060 | 4.52k | return -XML_TOK_PROLOG_S; | 1061 | 4.52k | } | 1062 | | /* fall through */ | 1063 | 1.11M | case BT_S: | 1064 | 1.49M | case BT_LF: | 1065 | 19.1M | for (;;) { | 1066 | 19.1M | ptr += MINBPC(enc); | 1067 | 19.1M | if (! HAS_CHAR(enc, ptr, end)) | 1068 | 1.02k | break; | 1069 | 19.1M | switch (BYTE_TYPE(enc, ptr)) { | 1070 | 1.08M | case BT_S: | 1071 | 1.27M | case BT_LF: | 1072 | 1.27M | break; | 1073 | 16.4M | case BT_CR: | 1074 | | /* don't split CR/LF pair */ | 1075 | 16.4M | if (ptr + MINBPC(enc) != end) | 1076 | 16.4M | break; | 1077 | | /* fall through */ | 1078 | 1.49M | default: | 1079 | 1.49M | *nextTokPtr = ptr; | 1080 | 1.49M | return XML_TOK_PROLOG_S; | 1081 | 19.1M | } | 1082 | 19.1M | } | 1083 | 1.02k | *nextTokPtr = ptr; | 1084 | 1.02k | return XML_TOK_PROLOG_S; | 1085 | 488k | case BT_PERCNT: | 1086 | 488k | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1087 | 1.69k | case BT_COMMA: | 1088 | 1.69k | *nextTokPtr = ptr + MINBPC(enc); | 1089 | 1.69k | return XML_TOK_COMMA; | 1090 | 16.2k | case BT_LSQB: | 1091 | 16.2k | *nextTokPtr = ptr + MINBPC(enc); | 1092 | 16.2k | return XML_TOK_OPEN_BRACKET; | 1093 | 10.7k | case BT_RSQB: | 1094 | 10.7k | ptr += MINBPC(enc); | 1095 | 10.7k | if (! HAS_CHAR(enc, ptr, end)) | 1096 | 23 | return -XML_TOK_CLOSE_BRACKET; | 1097 | 10.6k | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { | 1098 | 76 | REQUIRE_CHARS(enc, ptr, end, 2); | 1099 | 65 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { | 1100 | 10 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 1101 | 10 | return XML_TOK_COND_SECT_CLOSE; | 1102 | 10 | } | 1103 | 65 | } | 1104 | 10.6k | *nextTokPtr = ptr; | 1105 | 10.6k | return XML_TOK_CLOSE_BRACKET; | 1106 | 7.92M | case BT_LPAR: | 1107 | 7.92M | *nextTokPtr = ptr + MINBPC(enc); | 1108 | 7.92M | return XML_TOK_OPEN_PAREN; | 1109 | 214k | case BT_RPAR: | 1110 | 214k | ptr += MINBPC(enc); | 1111 | 214k | if (! HAS_CHAR(enc, ptr, end)) | 1112 | 129 | return -XML_TOK_CLOSE_PAREN; | 1113 | 214k | switch (BYTE_TYPE(enc, ptr)) { | 1114 | 5.32k | case BT_AST: | 1115 | 5.32k | *nextTokPtr = ptr + MINBPC(enc); | 1116 | 5.32k | return XML_TOK_CLOSE_PAREN_ASTERISK; | 1117 | 16.0k | case BT_QUEST: | 1118 | 16.0k | *nextTokPtr = ptr + MINBPC(enc); | 1119 | 16.0k | return XML_TOK_CLOSE_PAREN_QUESTION; | 1120 | 31.5k | case BT_PLUS: | 1121 | 31.5k | *nextTokPtr = ptr + MINBPC(enc); | 1122 | 31.5k | return XML_TOK_CLOSE_PAREN_PLUS; | 1123 | 366 | case BT_CR: | 1124 | 883 | case BT_LF: | 1125 | 98.5k | case BT_S: | 1126 | 98.9k | case BT_GT: | 1127 | 99.3k | case BT_COMMA: | 1128 | 104k | case BT_VERBAR: | 1129 | 161k | case BT_RPAR: | 1130 | 161k | *nextTokPtr = ptr; | 1131 | 161k | return XML_TOK_CLOSE_PAREN; | 1132 | 214k | } | 1133 | 75 | *nextTokPtr = ptr; | 1134 | 75 | return XML_TOK_INVALID; | 1135 | 3.11M | case BT_VERBAR: | 1136 | 3.11M | *nextTokPtr = ptr + MINBPC(enc); | 1137 | 3.11M | return XML_TOK_OR; | 1138 | 218k | case BT_GT: | 1139 | 218k | *nextTokPtr = ptr + MINBPC(enc); | 1140 | 218k | return XML_TOK_DECL_CLOSE; | 1141 | 104k | case BT_NUM: | 1142 | 104k | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1143 | 0 | # define LEAD_CASE(n) \ | 1144 | 0 | case BT_LEAD##n: \ | 1145 | 0 | if (end - ptr < n) \ | 1146 | 0 | return XML_TOK_PARTIAL_CHAR; \ | 1147 | 0 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ | 1148 | 0 | *nextTokPtr = ptr; \ | 1149 | 0 | return XML_TOK_INVALID; \ | 1150 | 0 | } \ | 1151 | 0 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ | 1152 | 0 | ptr += n; \ | 1153 | 0 | tok = XML_TOK_NAME; \ | 1154 | 0 | break; \ | 1155 | 0 | } \ | 1156 | 0 | if (IS_NAME_CHAR(enc, ptr, n)) { \ | 1157 | 0 | ptr += n; \ | 1158 | 0 | tok = XML_TOK_NMTOKEN; \ | 1159 | 0 | break; \ | 1160 | 0 | } \ | 1161 | 0 | *nextTokPtr = ptr; \ | 1162 | 0 | return XML_TOK_INVALID; | 1163 | 1.15M | LEAD_CASE(2) | 1164 | 518k | LEAD_CASE(3) | 1165 | 491 | LEAD_CASE(4) | 1166 | 0 | # undef LEAD_CASE | 1167 | 2.30M | case BT_NMSTRT: | 1168 | 2.58M | case BT_HEX: | 1169 | 2.58M | tok = XML_TOK_NAME; | 1170 | 2.58M | ptr += MINBPC(enc); | 1171 | 2.58M | break; | 1172 | 2.21k | case BT_DIGIT: | 1173 | 3.93k | case BT_NAME: | 1174 | 409k | case BT_MINUS: | 1175 | 409k | # ifdef XML_NS | 1176 | 718k | case BT_COLON: | 1177 | 718k | # endif | 1178 | 718k | tok = XML_TOK_NMTOKEN; | 1179 | 718k | ptr += MINBPC(enc); | 1180 | 718k | break; | 1181 | 0 | case BT_NONASCII: | 1182 | 0 | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { | 1183 | 0 | ptr += MINBPC(enc); | 1184 | 0 | tok = XML_TOK_NAME; | 1185 | 0 | break; | 1186 | 0 | } | 1187 | 0 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { | 1188 | 0 | ptr += MINBPC(enc); | 1189 | 0 | tok = XML_TOK_NMTOKEN; | 1190 | 0 | break; | 1191 | 0 | } | 1192 | | /* fall through */ | 1193 | 1.23k | default: | 1194 | 1.23k | *nextTokPtr = ptr; | 1195 | 1.23k | return XML_TOK_INVALID; | 1196 | 18.3M | } | 1197 | 152M | while (HAS_CHAR(enc, ptr, end)) { | 1198 | 152M | switch (BYTE_TYPE(enc, ptr)) { | 1199 | 453M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1200 | 7.86k | case BT_GT: | 1201 | 118k | case BT_RPAR: | 1202 | 118k | case BT_COMMA: | 1203 | 3.14M | case BT_VERBAR: | 1204 | 3.16M | case BT_LSQB: | 1205 | 3.16M | case BT_PERCNT: | 1206 | 3.36M | case BT_S: | 1207 | 3.63M | case BT_CR: | 1208 | 3.72M | case BT_LF: | 1209 | 3.72M | *nextTokPtr = ptr; | 1210 | 3.72M | return tok; | 1211 | 0 | # ifdef XML_NS | 1212 | 686k | case BT_COLON: | 1213 | 686k | ptr += MINBPC(enc); | 1214 | 686k | switch (tok) { | 1215 | 682k | case XML_TOK_NAME: | 1216 | 682k | REQUIRE_CHAR(enc, ptr, end); | 1217 | 681k | tok = XML_TOK_PREFIXED_NAME; | 1218 | 681k | switch (BYTE_TYPE(enc, ptr)) { | 1219 | 1.38M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1220 | 619 | default: | 1221 | 619 | tok = XML_TOK_NMTOKEN; | 1222 | 619 | break; | 1223 | 681k | } | 1224 | 681k | break; | 1225 | 681k | case XML_TOK_PREFIXED_NAME: | 1226 | 1.89k | tok = XML_TOK_NMTOKEN; | 1227 | 1.89k | break; | 1228 | 686k | } | 1229 | 686k | break; | 1230 | 686k | # endif | 1231 | 686k | case BT_PLUS: | 1232 | 378 | if (tok == XML_TOK_NMTOKEN) { | 1233 | 9 | *nextTokPtr = ptr; | 1234 | 9 | return XML_TOK_INVALID; | 1235 | 9 | } | 1236 | 369 | *nextTokPtr = ptr + MINBPC(enc); | 1237 | 369 | return XML_TOK_NAME_PLUS; | 1238 | 620 | case BT_AST: | 1239 | 620 | if (tok == XML_TOK_NMTOKEN) { | 1240 | 7 | *nextTokPtr = ptr; | 1241 | 7 | return XML_TOK_INVALID; | 1242 | 7 | } | 1243 | 613 | *nextTokPtr = ptr + MINBPC(enc); | 1244 | 613 | return XML_TOK_NAME_ASTERISK; | 1245 | 26.5k | case BT_QUEST: | 1246 | 26.5k | if (tok == XML_TOK_NMTOKEN) { | 1247 | 19 | *nextTokPtr = ptr; | 1248 | 19 | return XML_TOK_INVALID; | 1249 | 19 | } | 1250 | 26.4k | *nextTokPtr = ptr + MINBPC(enc); | 1251 | 26.4k | return XML_TOK_NAME_QUESTION; | 1252 | 604 | default: | 1253 | 604 | *nextTokPtr = ptr; | 1254 | 604 | return XML_TOK_INVALID; | 1255 | 152M | } | 1256 | 152M | } | 1257 | 11.5k | return -tok; | 1258 | 3.76M | } |
xmltok.c:little2_prologTok Line | Count | Source | 1018 | 804k | const char **nextTokPtr) { | 1019 | 804k | int tok; | 1020 | 804k | if (ptr >= end) | 1021 | 894 | return XML_TOK_NONE; | 1022 | 804k | if (MINBPC(enc) > 1) { | 1023 | 804k | size_t n = end - ptr; | 1024 | 804k | if (n & (MINBPC(enc) - 1)) { | 1025 | 56.5k | n &= ~(MINBPC(enc) - 1); | 1026 | 56.5k | if (n == 0) | 1027 | 160 | return XML_TOK_PARTIAL; | 1028 | 56.3k | end = ptr + n; | 1029 | 56.3k | } | 1030 | 804k | } | 1031 | 803k | switch (BYTE_TYPE(enc, ptr)) { | 1032 | 1.03k | case BT_QUOT: | 1033 | 1.03k | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1034 | 22.7k | case BT_APOS: | 1035 | 22.7k | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1036 | 69.2k | case BT_LT: { | 1037 | 69.2k | ptr += MINBPC(enc); | 1038 | 69.2k | REQUIRE_CHAR(enc, ptr, end); | 1039 | 69.1k | switch (BYTE_TYPE(enc, ptr)) { | 1040 | 33.2k | case BT_EXCL: | 1041 | 33.2k | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1042 | 25.1k | case BT_QUEST: | 1043 | 25.1k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1044 | 3.85k | case BT_NMSTRT: | 1045 | 5.05k | case BT_HEX: | 1046 | 10.0k | case BT_NONASCII: | 1047 | 10.0k | case BT_LEAD2: | 1048 | 10.0k | case BT_LEAD3: | 1049 | 10.1k | case BT_LEAD4: | 1050 | 10.1k | *nextTokPtr = ptr - MINBPC(enc); | 1051 | 10.1k | return XML_TOK_INSTANCE_START; | 1052 | 69.1k | } | 1053 | 694 | *nextTokPtr = ptr; | 1054 | 694 | return XML_TOK_INVALID; | 1055 | 69.1k | } | 1056 | 18.2k | case BT_CR: | 1057 | 18.2k | if (ptr + MINBPC(enc) == end) { | 1058 | 109 | *nextTokPtr = end; | 1059 | | /* indicate that this might be part of a CR/LF pair */ | 1060 | 109 | return -XML_TOK_PROLOG_S; | 1061 | 109 | } | 1062 | | /* fall through */ | 1063 | 74.7k | case BT_S: | 1064 | 95.1k | case BT_LF: | 1065 | 315k | for (;;) { | 1066 | 315k | ptr += MINBPC(enc); | 1067 | 315k | if (! HAS_CHAR(enc, ptr, end)) | 1068 | 238 | break; | 1069 | 315k | switch (BYTE_TYPE(enc, ptr)) { | 1070 | 79.6k | case BT_S: | 1071 | 151k | case BT_LF: | 1072 | 151k | break; | 1073 | 69.0k | case BT_CR: | 1074 | | /* don't split CR/LF pair */ | 1075 | 69.0k | if (ptr + MINBPC(enc) != end) | 1076 | 68.9k | break; | 1077 | | /* fall through */ | 1078 | 94.9k | default: | 1079 | 94.9k | *nextTokPtr = ptr; | 1080 | 94.9k | return XML_TOK_PROLOG_S; | 1081 | 315k | } | 1082 | 315k | } | 1083 | 238 | *nextTokPtr = ptr; | 1084 | 238 | return XML_TOK_PROLOG_S; | 1085 | 4.32k | case BT_PERCNT: | 1086 | 4.32k | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1087 | 3.04k | case BT_COMMA: | 1088 | 3.04k | *nextTokPtr = ptr + MINBPC(enc); | 1089 | 3.04k | return XML_TOK_COMMA; | 1090 | 2.48k | case BT_LSQB: | 1091 | 2.48k | *nextTokPtr = ptr + MINBPC(enc); | 1092 | 2.48k | return XML_TOK_OPEN_BRACKET; | 1093 | 1.24k | case BT_RSQB: | 1094 | 1.24k | ptr += MINBPC(enc); | 1095 | 1.24k | if (! HAS_CHAR(enc, ptr, end)) | 1096 | 11 | return -XML_TOK_CLOSE_BRACKET; | 1097 | 1.23k | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { | 1098 | 72 | REQUIRE_CHARS(enc, ptr, end, 2); | 1099 | 66 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { | 1100 | 6 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 1101 | 6 | return XML_TOK_COND_SECT_CLOSE; | 1102 | 6 | } | 1103 | 66 | } | 1104 | 1.22k | *nextTokPtr = ptr; | 1105 | 1.22k | return XML_TOK_CLOSE_BRACKET; | 1106 | 17.8k | case BT_LPAR: | 1107 | 17.8k | *nextTokPtr = ptr + MINBPC(enc); | 1108 | 17.8k | return XML_TOK_OPEN_PAREN; | 1109 | 16.3k | case BT_RPAR: | 1110 | 16.3k | ptr += MINBPC(enc); | 1111 | 16.3k | if (! HAS_CHAR(enc, ptr, end)) | 1112 | 33 | return -XML_TOK_CLOSE_PAREN; | 1113 | 16.2k | switch (BYTE_TYPE(enc, ptr)) { | 1114 | 8.91k | case BT_AST: | 1115 | 8.91k | *nextTokPtr = ptr + MINBPC(enc); | 1116 | 8.91k | return XML_TOK_CLOSE_PAREN_ASTERISK; | 1117 | 1.02k | case BT_QUEST: | 1118 | 1.02k | *nextTokPtr = ptr + MINBPC(enc); | 1119 | 1.02k | return XML_TOK_CLOSE_PAREN_QUESTION; | 1120 | 663 | case BT_PLUS: | 1121 | 663 | *nextTokPtr = ptr + MINBPC(enc); | 1122 | 663 | return XML_TOK_CLOSE_PAREN_PLUS; | 1123 | 183 | case BT_CR: | 1124 | 1.86k | case BT_LF: | 1125 | 2.54k | case BT_S: | 1126 | 2.86k | case BT_GT: | 1127 | 3.39k | case BT_COMMA: | 1128 | 4.78k | case BT_VERBAR: | 1129 | 5.60k | case BT_RPAR: | 1130 | 5.60k | *nextTokPtr = ptr; | 1131 | 5.60k | return XML_TOK_CLOSE_PAREN; | 1132 | 16.2k | } | 1133 | 57 | *nextTokPtr = ptr; | 1134 | 57 | return XML_TOK_INVALID; | 1135 | 240k | case BT_VERBAR: | 1136 | 240k | *nextTokPtr = ptr + MINBPC(enc); | 1137 | 240k | return XML_TOK_OR; | 1138 | 27.2k | case BT_GT: | 1139 | 27.2k | *nextTokPtr = ptr + MINBPC(enc); | 1140 | 27.2k | return XML_TOK_DECL_CLOSE; | 1141 | 10.6k | case BT_NUM: | 1142 | 10.6k | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1143 | 0 | # define LEAD_CASE(n) \ | 1144 | 0 | case BT_LEAD##n: \ | 1145 | 0 | if (end - ptr < n) \ | 1146 | 0 | return XML_TOK_PARTIAL_CHAR; \ | 1147 | 0 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ | 1148 | 0 | *nextTokPtr = ptr; \ | 1149 | 0 | return XML_TOK_INVALID; \ | 1150 | 0 | } \ | 1151 | 0 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ | 1152 | 0 | ptr += n; \ | 1153 | 0 | tok = XML_TOK_NAME; \ | 1154 | 0 | break; \ | 1155 | 0 | } \ | 1156 | 0 | if (IS_NAME_CHAR(enc, ptr, n)) { \ | 1157 | 0 | ptr += n; \ | 1158 | 0 | tok = XML_TOK_NMTOKEN; \ | 1159 | 0 | break; \ | 1160 | 0 | } \ | 1161 | 0 | *nextTokPtr = ptr; \ | 1162 | 0 | return XML_TOK_INVALID; | 1163 | 0 | LEAD_CASE(2) | 1164 | 0 | LEAD_CASE(3) | 1165 | 160 | LEAD_CASE(4) | 1166 | 0 | # undef LEAD_CASE | 1167 | 34.0k | case BT_NMSTRT: | 1168 | 49.3k | case BT_HEX: | 1169 | 49.3k | tok = XML_TOK_NAME; | 1170 | 49.3k | ptr += MINBPC(enc); | 1171 | 49.3k | break; | 1172 | 1.31k | case BT_DIGIT: | 1173 | 114k | case BT_NAME: | 1174 | 115k | case BT_MINUS: | 1175 | 115k | # ifdef XML_NS | 1176 | 120k | case BT_COLON: | 1177 | 120k | # endif | 1178 | 120k | tok = XML_TOK_NMTOKEN; | 1179 | 120k | ptr += MINBPC(enc); | 1180 | 120k | break; | 1181 | 121k | case BT_NONASCII: | 1182 | 121k | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { | 1183 | 121k | ptr += MINBPC(enc); | 1184 | 121k | tok = XML_TOK_NAME; | 1185 | 121k | break; | 1186 | 121k | } | 1187 | 539 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { | 1188 | 309 | ptr += MINBPC(enc); | 1189 | 309 | tok = XML_TOK_NMTOKEN; | 1190 | 309 | break; | 1191 | 309 | } | 1192 | | /* fall through */ | 1193 | 325 | default: | 1194 | 325 | *nextTokPtr = ptr; | 1195 | 325 | return XML_TOK_INVALID; | 1196 | 803k | } | 1197 | 1.58M | while (HAS_CHAR(enc, ptr, end)) { | 1198 | 1.58M | switch (BYTE_TYPE(enc, ptr)) { | 1199 | 4.73M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1200 | 351 | case BT_GT: | 1201 | 9.76k | case BT_RPAR: | 1202 | 10.3k | case BT_COMMA: | 1203 | 244k | case BT_VERBAR: | 1204 | 246k | case BT_LSQB: | 1205 | 246k | case BT_PERCNT: | 1206 | 272k | case BT_S: | 1207 | 275k | case BT_CR: | 1208 | 287k | case BT_LF: | 1209 | 287k | *nextTokPtr = ptr; | 1210 | 287k | return tok; | 1211 | 0 | # ifdef XML_NS | 1212 | 211k | case BT_COLON: | 1213 | 211k | ptr += MINBPC(enc); | 1214 | 211k | switch (tok) { | 1215 | 90.1k | case XML_TOK_NAME: | 1216 | 90.1k | REQUIRE_CHAR(enc, ptr, end); | 1217 | 90.1k | tok = XML_TOK_PREFIXED_NAME; | 1218 | 90.1k | switch (BYTE_TYPE(enc, ptr)) { | 1219 | 125k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1220 | 18.5k | default: | 1221 | 18.5k | tok = XML_TOK_NMTOKEN; | 1222 | 18.5k | break; | 1223 | 90.1k | } | 1224 | 89.9k | break; | 1225 | 89.9k | case XML_TOK_PREFIXED_NAME: | 1226 | 9.33k | tok = XML_TOK_NMTOKEN; | 1227 | 9.33k | break; | 1228 | 211k | } | 1229 | 211k | break; | 1230 | 211k | # endif | 1231 | 211k | case BT_PLUS: | 1232 | 1.11k | if (tok == XML_TOK_NMTOKEN) { | 1233 | 6 | *nextTokPtr = ptr; | 1234 | 6 | return XML_TOK_INVALID; | 1235 | 6 | } | 1236 | 1.10k | *nextTokPtr = ptr + MINBPC(enc); | 1237 | 1.10k | return XML_TOK_NAME_PLUS; | 1238 | 705 | case BT_AST: | 1239 | 705 | if (tok == XML_TOK_NMTOKEN) { | 1240 | 10 | *nextTokPtr = ptr; | 1241 | 10 | return XML_TOK_INVALID; | 1242 | 10 | } | 1243 | 695 | *nextTokPtr = ptr + MINBPC(enc); | 1244 | 695 | return XML_TOK_NAME_ASTERISK; | 1245 | 198 | case BT_QUEST: | 1246 | 198 | if (tok == XML_TOK_NMTOKEN) { | 1247 | 12 | *nextTokPtr = ptr; | 1248 | 12 | return XML_TOK_INVALID; | 1249 | 12 | } | 1250 | 186 | *nextTokPtr = ptr + MINBPC(enc); | 1251 | 186 | return XML_TOK_NAME_QUESTION; | 1252 | 141 | default: | 1253 | 141 | *nextTokPtr = ptr; | 1254 | 141 | return XML_TOK_INVALID; | 1255 | 1.58M | } | 1256 | 1.58M | } | 1257 | 1.23k | return -tok; | 1258 | 291k | } |
Line | Count | Source | 1018 | 2.22M | const char **nextTokPtr) { | 1019 | 2.22M | int tok; | 1020 | 2.22M | if (ptr >= end) | 1021 | 1.39k | return XML_TOK_NONE; | 1022 | 2.22M | if (MINBPC(enc) > 1) { | 1023 | 2.22M | size_t n = end - ptr; | 1024 | 2.22M | if (n & (MINBPC(enc) - 1)) { | 1025 | 71.8k | n &= ~(MINBPC(enc) - 1); | 1026 | 71.8k | if (n == 0) | 1027 | 124 | return XML_TOK_PARTIAL; | 1028 | 71.6k | end = ptr + n; | 1029 | 71.6k | } | 1030 | 2.22M | } | 1031 | 2.22M | switch (BYTE_TYPE(enc, ptr)) { | 1032 | 2.53k | case BT_QUOT: | 1033 | 2.53k | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1034 | 102k | case BT_APOS: | 1035 | 102k | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); | 1036 | 143k | case BT_LT: { | 1037 | 143k | ptr += MINBPC(enc); | 1038 | 143k | REQUIRE_CHAR(enc, ptr, end); | 1039 | 143k | switch (BYTE_TYPE(enc, ptr)) { | 1040 | 104k | case BT_EXCL: | 1041 | 104k | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1042 | 28.1k | case BT_QUEST: | 1043 | 28.1k | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1044 | 3.40k | case BT_NMSTRT: | 1045 | 5.43k | case BT_HEX: | 1046 | 10.3k | case BT_NONASCII: | 1047 | 10.3k | case BT_LEAD2: | 1048 | 10.3k | case BT_LEAD3: | 1049 | 10.3k | case BT_LEAD4: | 1050 | 10.3k | *nextTokPtr = ptr - MINBPC(enc); | 1051 | 10.3k | return XML_TOK_INSTANCE_START; | 1052 | 143k | } | 1053 | 710 | *nextTokPtr = ptr; | 1054 | 710 | return XML_TOK_INVALID; | 1055 | 143k | } | 1056 | 21.5k | case BT_CR: | 1057 | 21.5k | if (ptr + MINBPC(enc) == end) { | 1058 | 196 | *nextTokPtr = end; | 1059 | | /* indicate that this might be part of a CR/LF pair */ | 1060 | 196 | return -XML_TOK_PROLOG_S; | 1061 | 196 | } | 1062 | | /* fall through */ | 1063 | 186k | case BT_S: | 1064 | 432k | case BT_LF: | 1065 | 767k | for (;;) { | 1066 | 767k | ptr += MINBPC(enc); | 1067 | 767k | if (! HAS_CHAR(enc, ptr, end)) | 1068 | 389 | break; | 1069 | 767k | switch (BYTE_TYPE(enc, ptr)) { | 1070 | 80.4k | case BT_S: | 1071 | 144k | case BT_LF: | 1072 | 144k | break; | 1073 | 191k | case BT_CR: | 1074 | | /* don't split CR/LF pair */ | 1075 | 191k | if (ptr + MINBPC(enc) != end) | 1076 | 191k | break; | 1077 | | /* fall through */ | 1078 | 432k | default: | 1079 | 432k | *nextTokPtr = ptr; | 1080 | 432k | return XML_TOK_PROLOG_S; | 1081 | 767k | } | 1082 | 767k | } | 1083 | 389 | *nextTokPtr = ptr; | 1084 | 389 | return XML_TOK_PROLOG_S; | 1085 | 6.84k | case BT_PERCNT: | 1086 | 6.84k | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1087 | 128k | case BT_COMMA: | 1088 | 128k | *nextTokPtr = ptr + MINBPC(enc); | 1089 | 128k | return XML_TOK_COMMA; | 1090 | 2.93k | case BT_LSQB: | 1091 | 2.93k | *nextTokPtr = ptr + MINBPC(enc); | 1092 | 2.93k | return XML_TOK_OPEN_BRACKET; | 1093 | 1.72k | case BT_RSQB: | 1094 | 1.72k | ptr += MINBPC(enc); | 1095 | 1.72k | if (! HAS_CHAR(enc, ptr, end)) | 1096 | 19 | return -XML_TOK_CLOSE_BRACKET; | 1097 | 1.70k | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { | 1098 | 91 | REQUIRE_CHARS(enc, ptr, end, 2); | 1099 | 83 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { | 1100 | 9 | *nextTokPtr = ptr + 2 * MINBPC(enc); | 1101 | 9 | return XML_TOK_COND_SECT_CLOSE; | 1102 | 9 | } | 1103 | 83 | } | 1104 | 1.69k | *nextTokPtr = ptr; | 1105 | 1.69k | return XML_TOK_CLOSE_BRACKET; | 1106 | 225k | case BT_LPAR: | 1107 | 225k | *nextTokPtr = ptr + MINBPC(enc); | 1108 | 225k | return XML_TOK_OPEN_PAREN; | 1109 | 168k | case BT_RPAR: | 1110 | 168k | ptr += MINBPC(enc); | 1111 | 168k | if (! HAS_CHAR(enc, ptr, end)) | 1112 | 86 | return -XML_TOK_CLOSE_PAREN; | 1113 | 168k | switch (BYTE_TYPE(enc, ptr)) { | 1114 | 1.62k | case BT_AST: | 1115 | 1.62k | *nextTokPtr = ptr + MINBPC(enc); | 1116 | 1.62k | return XML_TOK_CLOSE_PAREN_ASTERISK; | 1117 | 920 | case BT_QUEST: | 1118 | 920 | *nextTokPtr = ptr + MINBPC(enc); | 1119 | 920 | return XML_TOK_CLOSE_PAREN_QUESTION; | 1120 | 52.7k | case BT_PLUS: | 1121 | 52.7k | *nextTokPtr = ptr + MINBPC(enc); | 1122 | 52.7k | return XML_TOK_CLOSE_PAREN_PLUS; | 1123 | 126 | case BT_CR: | 1124 | 831 | case BT_LF: | 1125 | 79.4k | case BT_S: | 1126 | 80.8k | case BT_GT: | 1127 | 108k | case BT_COMMA: | 1128 | 108k | case BT_VERBAR: | 1129 | 113k | case BT_RPAR: | 1130 | 113k | *nextTokPtr = ptr; | 1131 | 113k | return XML_TOK_CLOSE_PAREN; | 1132 | 168k | } | 1133 | 66 | *nextTokPtr = ptr; | 1134 | 66 | return XML_TOK_INVALID; | 1135 | 285k | case BT_VERBAR: | 1136 | 285k | *nextTokPtr = ptr + MINBPC(enc); | 1137 | 285k | return XML_TOK_OR; | 1138 | 99.2k | case BT_GT: | 1139 | 99.2k | *nextTokPtr = ptr + MINBPC(enc); | 1140 | 99.2k | return XML_TOK_DECL_CLOSE; | 1141 | 4.70k | case BT_NUM: | 1142 | 4.70k | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1143 | 0 | # define LEAD_CASE(n) \ | 1144 | 0 | case BT_LEAD##n: \ | 1145 | 0 | if (end - ptr < n) \ | 1146 | 0 | return XML_TOK_PARTIAL_CHAR; \ | 1147 | 0 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ | 1148 | 0 | *nextTokPtr = ptr; \ | 1149 | 0 | return XML_TOK_INVALID; \ | 1150 | 0 | } \ | 1151 | 0 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ | 1152 | 0 | ptr += n; \ | 1153 | 0 | tok = XML_TOK_NAME; \ | 1154 | 0 | break; \ | 1155 | 0 | } \ | 1156 | 0 | if (IS_NAME_CHAR(enc, ptr, n)) { \ | 1157 | 0 | ptr += n; \ | 1158 | 0 | tok = XML_TOK_NMTOKEN; \ | 1159 | 0 | break; \ | 1160 | 0 | } \ | 1161 | 0 | *nextTokPtr = ptr; \ | 1162 | 0 | return XML_TOK_INVALID; | 1163 | 0 | LEAD_CASE(2) | 1164 | 0 | LEAD_CASE(3) | 1165 | 231 | LEAD_CASE(4) | 1166 | 0 | # undef LEAD_CASE | 1167 | 202k | case BT_NMSTRT: | 1168 | 329k | case BT_HEX: | 1169 | 329k | tok = XML_TOK_NAME; | 1170 | 329k | ptr += MINBPC(enc); | 1171 | 329k | break; | 1172 | 12.1k | case BT_DIGIT: | 1173 | 12.7k | case BT_NAME: | 1174 | 12.8k | case BT_MINUS: | 1175 | 12.8k | # ifdef XML_NS | 1176 | 70.7k | case BT_COLON: | 1177 | 70.7k | # endif | 1178 | 70.7k | tok = XML_TOK_NMTOKEN; | 1179 | 70.7k | ptr += MINBPC(enc); | 1180 | 70.7k | break; | 1181 | 215k | case BT_NONASCII: | 1182 | 215k | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { | 1183 | 215k | ptr += MINBPC(enc); | 1184 | 215k | tok = XML_TOK_NAME; | 1185 | 215k | break; | 1186 | 215k | } | 1187 | 502 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { | 1188 | 367 | ptr += MINBPC(enc); | 1189 | 367 | tok = XML_TOK_NMTOKEN; | 1190 | 367 | break; | 1191 | 367 | } | 1192 | | /* fall through */ | 1193 | 265 | default: | 1194 | 265 | *nextTokPtr = ptr; | 1195 | 265 | return XML_TOK_INVALID; | 1196 | 2.22M | } | 1197 | 4.64M | while (HAS_CHAR(enc, ptr, end)) { | 1198 | 4.64M | switch (BYTE_TYPE(enc, ptr)) { | 1199 | 18.1M | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1200 | 516 | case BT_GT: | 1201 | 17.9k | case BT_RPAR: | 1202 | 20.2k | case BT_COMMA: | 1203 | 303k | case BT_VERBAR: | 1204 | 306k | case BT_LSQB: | 1205 | 306k | case BT_PERCNT: | 1206 | 343k | case BT_S: | 1207 | 346k | case BT_CR: | 1208 | 516k | case BT_LF: | 1209 | 516k | *nextTokPtr = ptr; | 1210 | 516k | return tok; | 1211 | 0 | # ifdef XML_NS | 1212 | 223k | case BT_COLON: | 1213 | 223k | ptr += MINBPC(enc); | 1214 | 223k | switch (tok) { | 1215 | 152k | case XML_TOK_NAME: | 1216 | 152k | REQUIRE_CHAR(enc, ptr, end); | 1217 | 151k | tok = XML_TOK_PREFIXED_NAME; | 1218 | 151k | switch (BYTE_TYPE(enc, ptr)) { | 1219 | 421k | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) | 1220 | 38.5k | default: | 1221 | 38.5k | tok = XML_TOK_NMTOKEN; | 1222 | 38.5k | break; | 1223 | 151k | } | 1224 | 151k | break; | 1225 | 151k | case XML_TOK_PREFIXED_NAME: | 1226 | 58.4k | tok = XML_TOK_NMTOKEN; | 1227 | 58.4k | break; | 1228 | 223k | } | 1229 | 223k | break; | 1230 | 223k | # endif | 1231 | 223k | case BT_PLUS: | 1232 | 96.3k | if (tok == XML_TOK_NMTOKEN) { | 1233 | 8 | *nextTokPtr = ptr; | 1234 | 8 | return XML_TOK_INVALID; | 1235 | 8 | } | 1236 | 96.3k | *nextTokPtr = ptr + MINBPC(enc); | 1237 | 96.3k | return XML_TOK_NAME_PLUS; | 1238 | 414 | case BT_AST: | 1239 | 414 | if (tok == XML_TOK_NMTOKEN) { | 1240 | 9 | *nextTokPtr = ptr; | 1241 | 9 | return XML_TOK_INVALID; | 1242 | 9 | } | 1243 | 405 | *nextTokPtr = ptr + MINBPC(enc); | 1244 | 405 | return XML_TOK_NAME_ASTERISK; | 1245 | 835 | case BT_QUEST: | 1246 | 835 | if (tok == XML_TOK_NMTOKEN) { | 1247 | 7 | *nextTokPtr = ptr; | 1248 | 7 | return XML_TOK_INVALID; | 1249 | 7 | } | 1250 | 828 | *nextTokPtr = ptr + MINBPC(enc); | 1251 | 828 | return XML_TOK_NAME_QUESTION; | 1252 | 156 | default: | 1253 | 156 | *nextTokPtr = ptr; | 1254 | 156 | return XML_TOK_INVALID; | 1255 | 4.64M | } | 1256 | 4.64M | } | 1257 | 1.66k | return -tok; | 1258 | 616k | } |
|
1259 | | |
1260 | | static int PTRCALL |
1261 | | PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
1262 | 89.9M | const char **nextTokPtr) { |
1263 | 89.9M | const char *start; |
1264 | 89.9M | if (ptr >= end) |
1265 | 2.40M | return XML_TOK_NONE; |
1266 | 87.5M | else if (! HAS_CHAR(enc, ptr, end)) { |
1267 | | /* This line cannot be executed. The incoming data has already |
1268 | | * been tokenized once, so incomplete characters like this have |
1269 | | * already been eliminated from the input. Retaining the paranoia |
1270 | | * check is still valuable, however. |
1271 | | */ |
1272 | 13 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
1273 | 13 | } |
1274 | 87.5M | start = ptr; |
1275 | 4.29G | while (HAS_CHAR(enc, ptr, end)) { |
1276 | 4.29G | switch (BYTE_TYPE(enc, ptr)) { |
1277 | 0 | # define LEAD_CASE(n) \ |
1278 | 3.07G | case BT_LEAD##n: \ |
1279 | 3.07G | ptr += n; /* NOTE: The encoding has already been validated. */ \ |
1280 | 3.07G | break; |
1281 | 1.06G | LEAD_CASE(2) |
1282 | 1.99G | LEAD_CASE(3) |
1283 | 4.87M | LEAD_CASE(4) |
1284 | 0 | # undef LEAD_CASE |
1285 | 4.08M | case BT_AMP: |
1286 | 4.08M | if (ptr == start) |
1287 | 2.94M | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1288 | 1.14M | *nextTokPtr = ptr; |
1289 | 1.14M | return XML_TOK_DATA_CHARS; |
1290 | 46 | case BT_LT: |
1291 | | /* this is for inside entity references */ |
1292 | 46 | *nextTokPtr = ptr; |
1293 | 46 | return XML_TOK_INVALID; |
1294 | 62.1M | case BT_LF: |
1295 | 62.1M | if (ptr == start) { |
1296 | 42.8M | *nextTokPtr = ptr + MINBPC(enc); |
1297 | 42.8M | return XML_TOK_DATA_NEWLINE; |
1298 | 42.8M | } |
1299 | 19.3M | *nextTokPtr = ptr; |
1300 | 19.3M | return XML_TOK_DATA_CHARS; |
1301 | 3.14M | case BT_CR: |
1302 | 3.14M | if (ptr == start) { |
1303 | 3.02M | ptr += MINBPC(enc); |
1304 | 3.02M | if (! HAS_CHAR(enc, ptr, end)) |
1305 | 1.89k | return XML_TOK_TRAILING_CR; |
1306 | 3.02M | if (BYTE_TYPE(enc, ptr) == BT_LF) |
1307 | 389k | ptr += MINBPC(enc); |
1308 | 3.02M | *nextTokPtr = ptr; |
1309 | 3.02M | return XML_TOK_DATA_NEWLINE; |
1310 | 3.02M | } |
1311 | 120k | *nextTokPtr = ptr; |
1312 | 120k | return XML_TOK_DATA_CHARS; |
1313 | 15.8M | case BT_S: |
1314 | 15.8M | if (ptr == start) { |
1315 | 10.9M | *nextTokPtr = ptr + MINBPC(enc); |
1316 | 10.9M | return XML_TOK_ATTRIBUTE_VALUE_S; |
1317 | 10.9M | } |
1318 | 4.88M | *nextTokPtr = ptr; |
1319 | 4.88M | return XML_TOK_DATA_CHARS; |
1320 | 1.13G | default: |
1321 | 1.13G | ptr += MINBPC(enc); |
1322 | 1.13G | break; |
1323 | 4.29G | } |
1324 | 4.29G | } |
1325 | 2.36M | *nextTokPtr = ptr; |
1326 | 2.36M | return XML_TOK_DATA_CHARS; |
1327 | 87.5M | } xmltok.c:normal_attributeValueTok Line | Count | Source | 1262 | 88.9M | const char **nextTokPtr) { | 1263 | 88.9M | const char *start; | 1264 | 88.9M | if (ptr >= end) | 1265 | 2.39M | return XML_TOK_NONE; | 1266 | 86.5M | else if (! HAS_CHAR(enc, ptr, end)) { | 1267 | | /* This line cannot be executed. The incoming data has already | 1268 | | * been tokenized once, so incomplete characters like this have | 1269 | | * already been eliminated from the input. Retaining the paranoia | 1270 | | * check is still valuable, however. | 1271 | | */ | 1272 | 0 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1273 | 0 | } | 1274 | 86.5M | start = ptr; | 1275 | 4.27G | while (HAS_CHAR(enc, ptr, end)) { | 1276 | 4.27G | switch (BYTE_TYPE(enc, ptr)) { | 1277 | 0 | # define LEAD_CASE(n) \ | 1278 | 0 | case BT_LEAD##n: \ | 1279 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1280 | 0 | break; | 1281 | 1.06G | LEAD_CASE(2) | 1282 | 1.99G | LEAD_CASE(3) | 1283 | 4.60M | LEAD_CASE(4) | 1284 | 0 | # undef LEAD_CASE | 1285 | 3.93M | case BT_AMP: | 1286 | 3.93M | if (ptr == start) | 1287 | 2.83M | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1288 | 1.09M | *nextTokPtr = ptr; | 1289 | 1.09M | return XML_TOK_DATA_CHARS; | 1290 | 29 | case BT_LT: | 1291 | | /* this is for inside entity references */ | 1292 | 29 | *nextTokPtr = ptr; | 1293 | 29 | return XML_TOK_INVALID; | 1294 | 61.9M | case BT_LF: | 1295 | 61.9M | if (ptr == start) { | 1296 | 42.7M | *nextTokPtr = ptr + MINBPC(enc); | 1297 | 42.7M | return XML_TOK_DATA_NEWLINE; | 1298 | 42.7M | } | 1299 | 19.1M | *nextTokPtr = ptr; | 1300 | 19.1M | return XML_TOK_DATA_CHARS; | 1301 | 2.53M | case BT_CR: | 1302 | 2.53M | if (ptr == start) { | 1303 | 2.51M | ptr += MINBPC(enc); | 1304 | 2.51M | if (! HAS_CHAR(enc, ptr, end)) | 1305 | 692 | return XML_TOK_TRAILING_CR; | 1306 | 2.51M | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1307 | 1.82k | ptr += MINBPC(enc); | 1308 | 2.51M | *nextTokPtr = ptr; | 1309 | 2.51M | return XML_TOK_DATA_NEWLINE; | 1310 | 2.51M | } | 1311 | 21.8k | *nextTokPtr = ptr; | 1312 | 21.8k | return XML_TOK_DATA_CHARS; | 1313 | 15.7M | case BT_S: | 1314 | 15.7M | if (ptr == start) { | 1315 | 10.9M | *nextTokPtr = ptr + MINBPC(enc); | 1316 | 10.9M | return XML_TOK_ATTRIBUTE_VALUE_S; | 1317 | 10.9M | } | 1318 | 4.85M | *nextTokPtr = ptr; | 1319 | 4.85M | return XML_TOK_DATA_CHARS; | 1320 | 1.11G | default: | 1321 | 1.11G | ptr += MINBPC(enc); | 1322 | 1.11G | break; | 1323 | 4.27G | } | 1324 | 4.27G | } | 1325 | 2.35M | *nextTokPtr = ptr; | 1326 | 2.35M | return XML_TOK_DATA_CHARS; | 1327 | 86.5M | } |
xmltok.c:little2_attributeValueTok Line | Count | Source | 1262 | 186k | const char **nextTokPtr) { | 1263 | 186k | const char *start; | 1264 | 186k | if (ptr >= end) | 1265 | 4.60k | return XML_TOK_NONE; | 1266 | 181k | else if (! HAS_CHAR(enc, ptr, end)) { | 1267 | | /* This line cannot be executed. The incoming data has already | 1268 | | * been tokenized once, so incomplete characters like this have | 1269 | | * already been eliminated from the input. Retaining the paranoia | 1270 | | * check is still valuable, however. | 1271 | | */ | 1272 | 5 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1273 | 5 | } | 1274 | 181k | start = ptr; | 1275 | 5.72M | while (HAS_CHAR(enc, ptr, end)) { | 1276 | 5.72M | switch (BYTE_TYPE(enc, ptr)) { | 1277 | 0 | # define LEAD_CASE(n) \ | 1278 | 0 | case BT_LEAD##n: \ | 1279 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1280 | 0 | break; | 1281 | 0 | LEAD_CASE(2) | 1282 | 0 | LEAD_CASE(3) | 1283 | 157k | LEAD_CASE(4) | 1284 | 0 | # undef LEAD_CASE | 1285 | 26.6k | case BT_AMP: | 1286 | 26.6k | if (ptr == start) | 1287 | 16.2k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1288 | 10.3k | *nextTokPtr = ptr; | 1289 | 10.3k | return XML_TOK_DATA_CHARS; | 1290 | 9 | case BT_LT: | 1291 | | /* this is for inside entity references */ | 1292 | 9 | *nextTokPtr = ptr; | 1293 | 9 | return XML_TOK_INVALID; | 1294 | 20.3k | case BT_LF: | 1295 | 20.3k | if (ptr == start) { | 1296 | 10.5k | *nextTokPtr = ptr + MINBPC(enc); | 1297 | 10.5k | return XML_TOK_DATA_NEWLINE; | 1298 | 10.5k | } | 1299 | 9.76k | *nextTokPtr = ptr; | 1300 | 9.76k | return XML_TOK_DATA_CHARS; | 1301 | 108k | case BT_CR: | 1302 | 108k | if (ptr == start) { | 1303 | 74.1k | ptr += MINBPC(enc); | 1304 | 74.1k | if (! HAS_CHAR(enc, ptr, end)) | 1305 | 659 | return XML_TOK_TRAILING_CR; | 1306 | 73.5k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1307 | 14.7k | ptr += MINBPC(enc); | 1308 | 73.5k | *nextTokPtr = ptr; | 1309 | 73.5k | return XML_TOK_DATA_NEWLINE; | 1310 | 74.1k | } | 1311 | 34.5k | *nextTokPtr = ptr; | 1312 | 34.5k | return XML_TOK_DATA_CHARS; | 1313 | 24.1k | case BT_S: | 1314 | 24.1k | if (ptr == start) { | 1315 | 14.5k | *nextTokPtr = ptr + MINBPC(enc); | 1316 | 14.5k | return XML_TOK_ATTRIBUTE_VALUE_S; | 1317 | 14.5k | } | 1318 | 9.56k | *nextTokPtr = ptr; | 1319 | 9.56k | return XML_TOK_DATA_CHARS; | 1320 | 5.38M | default: | 1321 | 5.38M | ptr += MINBPC(enc); | 1322 | 5.38M | break; | 1323 | 5.72M | } | 1324 | 5.72M | } | 1325 | 1.74k | *nextTokPtr = ptr; | 1326 | 1.74k | return XML_TOK_DATA_CHARS; | 1327 | 181k | } |
xmltok.c:big2_attributeValueTok Line | Count | Source | 1262 | 876k | const char **nextTokPtr) { | 1263 | 876k | const char *start; | 1264 | 876k | if (ptr >= end) | 1265 | 7.43k | return XML_TOK_NONE; | 1266 | 869k | else if (! HAS_CHAR(enc, ptr, end)) { | 1267 | | /* This line cannot be executed. The incoming data has already | 1268 | | * been tokenized once, so incomplete characters like this have | 1269 | | * already been eliminated from the input. Retaining the paranoia | 1270 | | * check is still valuable, however. | 1271 | | */ | 1272 | 8 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1273 | 8 | } | 1274 | 869k | start = ptr; | 1275 | 17.9M | while (HAS_CHAR(enc, ptr, end)) { | 1276 | 17.9M | switch (BYTE_TYPE(enc, ptr)) { | 1277 | 0 | # define LEAD_CASE(n) \ | 1278 | 0 | case BT_LEAD##n: \ | 1279 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1280 | 0 | break; | 1281 | 0 | LEAD_CASE(2) | 1282 | 0 | LEAD_CASE(3) | 1283 | 111k | LEAD_CASE(4) | 1284 | 0 | # undef LEAD_CASE | 1285 | 125k | case BT_AMP: | 1286 | 125k | if (ptr == start) | 1287 | 92.6k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1288 | 32.5k | *nextTokPtr = ptr; | 1289 | 32.5k | return XML_TOK_DATA_CHARS; | 1290 | 8 | case BT_LT: | 1291 | | /* this is for inside entity references */ | 1292 | 8 | *nextTokPtr = ptr; | 1293 | 8 | return XML_TOK_INVALID; | 1294 | 201k | case BT_LF: | 1295 | 201k | if (ptr == start) { | 1296 | 106k | *nextTokPtr = ptr + MINBPC(enc); | 1297 | 106k | return XML_TOK_DATA_NEWLINE; | 1298 | 106k | } | 1299 | 94.9k | *nextTokPtr = ptr; | 1300 | 94.9k | return XML_TOK_DATA_CHARS; | 1301 | 496k | case BT_CR: | 1302 | 496k | if (ptr == start) { | 1303 | 432k | ptr += MINBPC(enc); | 1304 | 432k | if (! HAS_CHAR(enc, ptr, end)) | 1305 | 547 | return XML_TOK_TRAILING_CR; | 1306 | 432k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1307 | 373k | ptr += MINBPC(enc); | 1308 | 432k | *nextTokPtr = ptr; | 1309 | 432k | return XML_TOK_DATA_NEWLINE; | 1310 | 432k | } | 1311 | 64.2k | *nextTokPtr = ptr; | 1312 | 64.2k | return XML_TOK_DATA_CHARS; | 1313 | 42.1k | case BT_S: | 1314 | 42.1k | if (ptr == start) { | 1315 | 24.3k | *nextTokPtr = ptr + MINBPC(enc); | 1316 | 24.3k | return XML_TOK_ATTRIBUTE_VALUE_S; | 1317 | 24.3k | } | 1318 | 17.8k | *nextTokPtr = ptr; | 1319 | 17.8k | return XML_TOK_DATA_CHARS; | 1320 | 17.0M | default: | 1321 | 17.0M | ptr += MINBPC(enc); | 1322 | 17.0M | break; | 1323 | 17.9M | } | 1324 | 17.9M | } | 1325 | 3.50k | *nextTokPtr = ptr; | 1326 | 3.50k | return XML_TOK_DATA_CHARS; | 1327 | 869k | } |
|
1328 | | |
1329 | | static int PTRCALL |
1330 | | PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
1331 | 3.62M | const char **nextTokPtr) { |
1332 | 3.62M | const char *start; |
1333 | 3.62M | if (ptr >= end) |
1334 | 34.8k | return XML_TOK_NONE; |
1335 | 3.58M | else if (! HAS_CHAR(enc, ptr, end)) { |
1336 | | /* This line cannot be executed. The incoming data has already |
1337 | | * been tokenized once, so incomplete characters like this have |
1338 | | * already been eliminated from the input. Retaining the paranoia |
1339 | | * check is still valuable, however. |
1340 | | */ |
1341 | 98 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
1342 | 98 | } |
1343 | 3.58M | start = ptr; |
1344 | 284M | while (HAS_CHAR(enc, ptr, end)) { |
1345 | 284M | switch (BYTE_TYPE(enc, ptr)) { |
1346 | 0 | # define LEAD_CASE(n) \ |
1347 | 3.46M | case BT_LEAD##n: \ |
1348 | 3.46M | ptr += n; /* NOTE: The encoding has already been validated. */ \ |
1349 | 3.46M | break; |
1350 | 144k | LEAD_CASE(2) |
1351 | 30.7k | LEAD_CASE(3) |
1352 | 3.28M | LEAD_CASE(4) |
1353 | 0 | # undef LEAD_CASE |
1354 | 585k | case BT_AMP: |
1355 | 585k | if (ptr == start) |
1356 | 314k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1357 | 271k | *nextTokPtr = ptr; |
1358 | 271k | return XML_TOK_DATA_CHARS; |
1359 | 1.55k | case BT_PERCNT: |
1360 | 1.55k | if (ptr == start) { |
1361 | 852 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1362 | 852 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
1363 | 852 | } |
1364 | 705 | *nextTokPtr = ptr; |
1365 | 705 | return XML_TOK_DATA_CHARS; |
1366 | 1.00M | case BT_LF: |
1367 | 1.00M | if (ptr == start) { |
1368 | 577k | *nextTokPtr = ptr + MINBPC(enc); |
1369 | 577k | return XML_TOK_DATA_NEWLINE; |
1370 | 577k | } |
1371 | 425k | *nextTokPtr = ptr; |
1372 | 425k | return XML_TOK_DATA_CHARS; |
1373 | 1.97M | case BT_CR: |
1374 | 1.97M | if (ptr == start) { |
1375 | 1.08M | ptr += MINBPC(enc); |
1376 | 1.08M | if (! HAS_CHAR(enc, ptr, end)) |
1377 | 7.65k | return XML_TOK_TRAILING_CR; |
1378 | 1.07M | if (BYTE_TYPE(enc, ptr) == BT_LF) |
1379 | 93.6k | ptr += MINBPC(enc); |
1380 | 1.07M | *nextTokPtr = ptr; |
1381 | 1.07M | return XML_TOK_DATA_NEWLINE; |
1382 | 1.08M | } |
1383 | 895k | *nextTokPtr = ptr; |
1384 | 895k | return XML_TOK_DATA_CHARS; |
1385 | 277M | default: |
1386 | 277M | ptr += MINBPC(enc); |
1387 | 277M | break; |
1388 | 284M | } |
1389 | 284M | } |
1390 | 20.4k | *nextTokPtr = ptr; |
1391 | 20.4k | return XML_TOK_DATA_CHARS; |
1392 | 3.58M | } xmltok.c:normal_entityValueTok Line | Count | Source | 1331 | 1.76M | const char **nextTokPtr) { | 1332 | 1.76M | const char *start; | 1333 | 1.76M | if (ptr >= end) | 1334 | 19.2k | return XML_TOK_NONE; | 1335 | 1.74M | else if (! HAS_CHAR(enc, ptr, end)) { | 1336 | | /* This line cannot be executed. The incoming data has already | 1337 | | * been tokenized once, so incomplete characters like this have | 1338 | | * already been eliminated from the input. Retaining the paranoia | 1339 | | * check is still valuable, however. | 1340 | | */ | 1341 | 0 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1342 | 0 | } | 1343 | 1.74M | start = ptr; | 1344 | 184M | while (HAS_CHAR(enc, ptr, end)) { | 1345 | 184M | switch (BYTE_TYPE(enc, ptr)) { | 1346 | 0 | # define LEAD_CASE(n) \ | 1347 | 0 | case BT_LEAD##n: \ | 1348 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1349 | 0 | break; | 1350 | 144k | LEAD_CASE(2) | 1351 | 30.7k | LEAD_CASE(3) | 1352 | 424 | LEAD_CASE(4) | 1353 | 0 | # undef LEAD_CASE | 1354 | 511k | case BT_AMP: | 1355 | 511k | if (ptr == start) | 1356 | 272k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1357 | 238k | *nextTokPtr = ptr; | 1358 | 238k | return XML_TOK_DATA_CHARS; | 1359 | 1.47k | case BT_PERCNT: | 1360 | 1.47k | if (ptr == start) { | 1361 | 804 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1362 | 804 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; | 1363 | 804 | } | 1364 | 673 | *nextTokPtr = ptr; | 1365 | 673 | return XML_TOK_DATA_CHARS; | 1366 | 352k | case BT_LF: | 1367 | 352k | if (ptr == start) { | 1368 | 234k | *nextTokPtr = ptr + MINBPC(enc); | 1369 | 234k | return XML_TOK_DATA_NEWLINE; | 1370 | 234k | } | 1371 | 118k | *nextTokPtr = ptr; | 1372 | 118k | return XML_TOK_DATA_CHARS; | 1373 | 867k | case BT_CR: | 1374 | 867k | if (ptr == start) { | 1375 | 454k | ptr += MINBPC(enc); | 1376 | 454k | if (! HAS_CHAR(enc, ptr, end)) | 1377 | 1.00k | return XML_TOK_TRAILING_CR; | 1378 | 453k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1379 | 1.82k | ptr += MINBPC(enc); | 1380 | 453k | *nextTokPtr = ptr; | 1381 | 453k | return XML_TOK_DATA_NEWLINE; | 1382 | 454k | } | 1383 | 413k | *nextTokPtr = ptr; | 1384 | 413k | return XML_TOK_DATA_CHARS; | 1385 | 182M | default: | 1386 | 182M | ptr += MINBPC(enc); | 1387 | 182M | break; | 1388 | 184M | } | 1389 | 184M | } | 1390 | 11.4k | *nextTokPtr = ptr; | 1391 | 11.4k | return XML_TOK_DATA_CHARS; | 1392 | 1.74M | } |
xmltok.c:little2_entityValueTok Line | Count | Source | 1331 | 1.01M | const char **nextTokPtr) { | 1332 | 1.01M | const char *start; | 1333 | 1.01M | if (ptr >= end) | 1334 | 8.25k | return XML_TOK_NONE; | 1335 | 1.00M | else if (! HAS_CHAR(enc, ptr, end)) { | 1336 | | /* This line cannot be executed. The incoming data has already | 1337 | | * been tokenized once, so incomplete characters like this have | 1338 | | * already been eliminated from the input. Retaining the paranoia | 1339 | | * check is still valuable, however. | 1340 | | */ | 1341 | 52 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1342 | 52 | } | 1343 | 1.00M | start = ptr; | 1344 | 46.7M | while (HAS_CHAR(enc, ptr, end)) { | 1345 | 46.7M | switch (BYTE_TYPE(enc, ptr)) { | 1346 | 0 | # define LEAD_CASE(n) \ | 1347 | 0 | case BT_LEAD##n: \ | 1348 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1349 | 0 | break; | 1350 | 0 | LEAD_CASE(2) | 1351 | 0 | LEAD_CASE(3) | 1352 | 1.34M | LEAD_CASE(4) | 1353 | 0 | # undef LEAD_CASE | 1354 | 39.0k | case BT_AMP: | 1355 | 39.0k | if (ptr == start) | 1356 | 23.4k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1357 | 15.6k | *nextTokPtr = ptr; | 1358 | 15.6k | return XML_TOK_DATA_CHARS; | 1359 | 29 | case BT_PERCNT: | 1360 | 29 | if (ptr == start) { | 1361 | 16 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1362 | 16 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; | 1363 | 16 | } | 1364 | 13 | *nextTokPtr = ptr; | 1365 | 13 | return XML_TOK_DATA_CHARS; | 1366 | 430k | case BT_LF: | 1367 | 430k | if (ptr == start) { | 1368 | 221k | *nextTokPtr = ptr + MINBPC(enc); | 1369 | 221k | return XML_TOK_DATA_NEWLINE; | 1370 | 221k | } | 1371 | 209k | *nextTokPtr = ptr; | 1372 | 209k | return XML_TOK_DATA_CHARS; | 1373 | 531k | case BT_CR: | 1374 | 531k | if (ptr == start) { | 1375 | 286k | ptr += MINBPC(enc); | 1376 | 286k | if (! HAS_CHAR(enc, ptr, end)) | 1377 | 4.73k | return XML_TOK_TRAILING_CR; | 1378 | 281k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1379 | 3.34k | ptr += MINBPC(enc); | 1380 | 281k | *nextTokPtr = ptr; | 1381 | 281k | return XML_TOK_DATA_NEWLINE; | 1382 | 286k | } | 1383 | 245k | *nextTokPtr = ptr; | 1384 | 245k | return XML_TOK_DATA_CHARS; | 1385 | 44.4M | default: | 1386 | 44.4M | ptr += MINBPC(enc); | 1387 | 44.4M | break; | 1388 | 46.7M | } | 1389 | 46.7M | } | 1390 | 3.51k | *nextTokPtr = ptr; | 1391 | 3.51k | return XML_TOK_DATA_CHARS; | 1392 | 1.00M | } |
xmltok.c:big2_entityValueTok Line | Count | Source | 1331 | 844k | const char **nextTokPtr) { | 1332 | 844k | const char *start; | 1333 | 844k | if (ptr >= end) | 1334 | 7.37k | return XML_TOK_NONE; | 1335 | 837k | else if (! HAS_CHAR(enc, ptr, end)) { | 1336 | | /* This line cannot be executed. The incoming data has already | 1337 | | * been tokenized once, so incomplete characters like this have | 1338 | | * already been eliminated from the input. Retaining the paranoia | 1339 | | * check is still valuable, however. | 1340 | | */ | 1341 | 46 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ | 1342 | 46 | } | 1343 | 837k | start = ptr; | 1344 | 53.1M | while (HAS_CHAR(enc, ptr, end)) { | 1345 | 53.1M | switch (BYTE_TYPE(enc, ptr)) { | 1346 | 0 | # define LEAD_CASE(n) \ | 1347 | 0 | case BT_LEAD##n: \ | 1348 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1349 | 0 | break; | 1350 | 0 | LEAD_CASE(2) | 1351 | 0 | LEAD_CASE(3) | 1352 | 1.94M | LEAD_CASE(4) | 1353 | 0 | # undef LEAD_CASE | 1354 | 35.1k | case BT_AMP: | 1355 | 35.1k | if (ptr == start) | 1356 | 18.5k | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1357 | 16.6k | *nextTokPtr = ptr; | 1358 | 16.6k | return XML_TOK_DATA_CHARS; | 1359 | 51 | case BT_PERCNT: | 1360 | 51 | if (ptr == start) { | 1361 | 32 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); | 1362 | 32 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; | 1363 | 32 | } | 1364 | 19 | *nextTokPtr = ptr; | 1365 | 19 | return XML_TOK_DATA_CHARS; | 1366 | 219k | case BT_LF: | 1367 | 219k | if (ptr == start) { | 1368 | 121k | *nextTokPtr = ptr + MINBPC(enc); | 1369 | 121k | return XML_TOK_DATA_NEWLINE; | 1370 | 121k | } | 1371 | 97.6k | *nextTokPtr = ptr; | 1372 | 97.6k | return XML_TOK_DATA_CHARS; | 1373 | 577k | case BT_CR: | 1374 | 577k | if (ptr == start) { | 1375 | 339k | ptr += MINBPC(enc); | 1376 | 339k | if (! HAS_CHAR(enc, ptr, end)) | 1377 | 1.90k | return XML_TOK_TRAILING_CR; | 1378 | 337k | if (BYTE_TYPE(enc, ptr) == BT_LF) | 1379 | 88.5k | ptr += MINBPC(enc); | 1380 | 337k | *nextTokPtr = ptr; | 1381 | 337k | return XML_TOK_DATA_NEWLINE; | 1382 | 339k | } | 1383 | 237k | *nextTokPtr = ptr; | 1384 | 237k | return XML_TOK_DATA_CHARS; | 1385 | 50.3M | default: | 1386 | 50.3M | ptr += MINBPC(enc); | 1387 | 50.3M | break; | 1388 | 53.1M | } | 1389 | 53.1M | } | 1390 | 5.46k | *nextTokPtr = ptr; | 1391 | 5.46k | return XML_TOK_DATA_CHARS; | 1392 | 837k | } |
|
1393 | | |
1394 | | # ifdef XML_DTD |
1395 | | |
1396 | | static int PTRCALL |
1397 | | PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, |
1398 | 0 | const char **nextTokPtr) { |
1399 | 0 | int level = 0; |
1400 | 0 | if (MINBPC(enc) > 1) { |
1401 | 0 | size_t n = end - ptr; |
1402 | 0 | if (n & (MINBPC(enc) - 1)) { |
1403 | 0 | n &= ~(MINBPC(enc) - 1); |
1404 | 0 | end = ptr + n; |
1405 | 0 | } |
1406 | 0 | } |
1407 | 0 | while (HAS_CHAR(enc, ptr, end)) { |
1408 | 0 | switch (BYTE_TYPE(enc, ptr)) { |
1409 | 0 | INVALID_CASES(ptr, nextTokPtr) |
1410 | 0 | case BT_LT: |
1411 | 0 | ptr += MINBPC(enc); |
1412 | 0 | REQUIRE_CHAR(enc, ptr, end); |
1413 | 0 | if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
1414 | 0 | ptr += MINBPC(enc); |
1415 | 0 | REQUIRE_CHAR(enc, ptr, end); |
1416 | 0 | if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
1417 | 0 | ++level; |
1418 | 0 | ptr += MINBPC(enc); |
1419 | 0 | } |
1420 | 0 | } |
1421 | 0 | break; |
1422 | 0 | case BT_RSQB: |
1423 | 0 | ptr += MINBPC(enc); |
1424 | 0 | REQUIRE_CHAR(enc, ptr, end); |
1425 | 0 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1426 | 0 | ptr += MINBPC(enc); |
1427 | 0 | REQUIRE_CHAR(enc, ptr, end); |
1428 | 0 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
1429 | 0 | ptr += MINBPC(enc); |
1430 | 0 | if (level == 0) { |
1431 | 0 | *nextTokPtr = ptr; |
1432 | 0 | return XML_TOK_IGNORE_SECT; |
1433 | 0 | } |
1434 | 0 | --level; |
1435 | 0 | } |
1436 | 0 | } |
1437 | 0 | break; |
1438 | 0 | default: |
1439 | 0 | ptr += MINBPC(enc); |
1440 | 0 | break; |
1441 | 0 | } |
1442 | 0 | } |
1443 | 0 | return XML_TOK_PARTIAL; |
1444 | 0 | } Unexecuted instantiation: xmltok.c:normal_ignoreSectionTok Unexecuted instantiation: xmltok.c:little2_ignoreSectionTok Unexecuted instantiation: xmltok.c:big2_ignoreSectionTok |
1445 | | |
1446 | | # endif /* XML_DTD */ |
1447 | | |
1448 | | static int PTRCALL |
1449 | | PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
1450 | 140k | const char **badPtr) { |
1451 | 140k | ptr += MINBPC(enc); |
1452 | 140k | end -= MINBPC(enc); |
1453 | 15.1M | for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
1454 | 15.1M | switch (BYTE_TYPE(enc, ptr)) { |
1455 | 264k | case BT_DIGIT: |
1456 | 10.1M | case BT_HEX: |
1457 | 10.1M | case BT_MINUS: |
1458 | 10.1M | case BT_APOS: |
1459 | 10.1M | case BT_LPAR: |
1460 | 10.2M | case BT_RPAR: |
1461 | 10.2M | case BT_PLUS: |
1462 | 10.2M | case BT_COMMA: |
1463 | 10.2M | case BT_SOL: |
1464 | 10.2M | case BT_EQUALS: |
1465 | 10.3M | case BT_QUEST: |
1466 | 11.0M | case BT_CR: |
1467 | 11.0M | case BT_LF: |
1468 | 11.1M | case BT_SEMI: |
1469 | 11.2M | case BT_EXCL: |
1470 | 11.2M | case BT_AST: |
1471 | 11.2M | case BT_PERCNT: |
1472 | 11.3M | case BT_NUM: |
1473 | 11.3M | # ifdef XML_NS |
1474 | 11.3M | case BT_COLON: |
1475 | 11.3M | # endif |
1476 | 11.3M | break; |
1477 | 62.0k | case BT_S: |
1478 | 62.0k | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
1479 | 23 | *badPtr = ptr; |
1480 | 23 | return 0; |
1481 | 23 | } |
1482 | 62.0k | break; |
1483 | 112k | case BT_NAME: |
1484 | 3.21M | case BT_NMSTRT: |
1485 | 3.21M | if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
1486 | 3.21M | break; |
1487 | | /* fall through */ |
1488 | 514k | default: |
1489 | 514k | switch (BYTE_TO_ASCII(enc, ptr)) { |
1490 | 491k | case 0x24: /* $ */ |
1491 | 514k | case 0x40: /* @ */ |
1492 | 514k | break; |
1493 | 443 | default: |
1494 | 443 | *badPtr = ptr; |
1495 | 443 | return 0; |
1496 | 514k | } |
1497 | 514k | break; |
1498 | 15.1M | } |
1499 | 15.1M | } |
1500 | 140k | return 1; |
1501 | 140k | } xmltok.c:normal_isPublicId Line | Count | Source | 1450 | 49.2k | const char **badPtr) { | 1451 | 49.2k | ptr += MINBPC(enc); | 1452 | 49.2k | end -= MINBPC(enc); | 1453 | 14.6M | for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 1454 | 14.6M | switch (BYTE_TYPE(enc, ptr)) { | 1455 | 244k | case BT_DIGIT: | 1456 | 10.1M | case BT_HEX: | 1457 | 10.1M | case BT_MINUS: | 1458 | 10.1M | case BT_APOS: | 1459 | 10.1M | case BT_LPAR: | 1460 | 10.1M | case BT_RPAR: | 1461 | 10.1M | case BT_PLUS: | 1462 | 10.1M | case BT_COMMA: | 1463 | 10.1M | case BT_SOL: | 1464 | 10.1M | case BT_EQUALS: | 1465 | 10.1M | case BT_QUEST: | 1466 | 10.8M | case BT_CR: | 1467 | 10.9M | case BT_LF: | 1468 | 10.9M | case BT_SEMI: | 1469 | 10.9M | case BT_EXCL: | 1470 | 10.9M | case BT_AST: | 1471 | 10.9M | case BT_PERCNT: | 1472 | 11.0M | case BT_NUM: | 1473 | 11.0M | # ifdef XML_NS | 1474 | 11.0M | case BT_COLON: | 1475 | 11.0M | # endif | 1476 | 11.0M | break; | 1477 | 13.0k | case BT_S: | 1478 | 13.0k | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { | 1479 | 13 | *badPtr = ptr; | 1480 | 13 | return 0; | 1481 | 13 | } | 1482 | 13.0k | break; | 1483 | 86.6k | case BT_NAME: | 1484 | 3.16M | case BT_NMSTRT: | 1485 | 3.16M | if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) | 1486 | 3.16M | break; | 1487 | | /* fall through */ | 1488 | 482k | default: | 1489 | 482k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1490 | 480k | case 0x24: /* $ */ | 1491 | 482k | case 0x40: /* @ */ | 1492 | 482k | break; | 1493 | 47 | default: | 1494 | 47 | *badPtr = ptr; | 1495 | 47 | return 0; | 1496 | 482k | } | 1497 | 482k | break; | 1498 | 14.6M | } | 1499 | 14.6M | } | 1500 | 49.1k | return 1; | 1501 | 49.2k | } |
xmltok.c:little2_isPublicId Line | Count | Source | 1450 | 6.09k | const char **badPtr) { | 1451 | 6.09k | ptr += MINBPC(enc); | 1452 | 6.09k | end -= MINBPC(enc); | 1453 | 211k | for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 1454 | 211k | switch (BYTE_TYPE(enc, ptr)) { | 1455 | 13.9k | case BT_DIGIT: | 1456 | 20.4k | case BT_HEX: | 1457 | 28.1k | case BT_MINUS: | 1458 | 29.2k | case BT_APOS: | 1459 | 30.8k | case BT_LPAR: | 1460 | 61.9k | case BT_RPAR: | 1461 | 69.3k | case BT_PLUS: | 1462 | 78.7k | case BT_COMMA: | 1463 | 79.6k | case BT_SOL: | 1464 | 80.6k | case BT_EQUALS: | 1465 | 81.5k | case BT_QUEST: | 1466 | 81.9k | case BT_CR: | 1467 | 82.6k | case BT_LF: | 1468 | 84.8k | case BT_SEMI: | 1469 | 98.2k | case BT_EXCL: | 1470 | 131k | case BT_AST: | 1471 | 135k | case BT_PERCNT: | 1472 | 148k | case BT_NUM: | 1473 | 148k | # ifdef XML_NS | 1474 | 155k | case BT_COLON: | 1475 | 155k | # endif | 1476 | 155k | break; | 1477 | 13.2k | case BT_S: | 1478 | 13.2k | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { | 1479 | 5 | *badPtr = ptr; | 1480 | 5 | return 0; | 1481 | 5 | } | 1482 | 13.2k | break; | 1483 | 13.2k | case BT_NAME: | 1484 | 27.5k | case BT_NMSTRT: | 1485 | 27.5k | if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) | 1486 | 27.4k | break; | 1487 | | /* fall through */ | 1488 | 15.2k | default: | 1489 | 15.2k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1490 | 6.56k | case 0x24: /* $ */ | 1491 | 14.9k | case 0x40: /* @ */ | 1492 | 14.9k | break; | 1493 | 226 | default: | 1494 | 226 | *badPtr = ptr; | 1495 | 226 | return 0; | 1496 | 15.2k | } | 1497 | 14.9k | break; | 1498 | 211k | } | 1499 | 211k | } | 1500 | 5.86k | return 1; | 1501 | 6.09k | } |
Line | Count | Source | 1450 | 85.6k | const char **badPtr) { | 1451 | 85.6k | ptr += MINBPC(enc); | 1452 | 85.6k | end -= MINBPC(enc); | 1453 | 238k | for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { | 1454 | 238k | switch (BYTE_TYPE(enc, ptr)) { | 1455 | 5.89k | case BT_DIGIT: | 1456 | 14.6k | case BT_HEX: | 1457 | 22.0k | case BT_MINUS: | 1458 | 24.1k | case BT_APOS: | 1459 | 28.3k | case BT_LPAR: | 1460 | 56.3k | case BT_RPAR: | 1461 | 60.3k | case BT_PLUS: | 1462 | 72.5k | case BT_COMMA: | 1463 | 73.6k | case BT_SOL: | 1464 | 76.4k | case BT_EQUALS: | 1465 | 76.8k | case BT_QUEST: | 1466 | 78.0k | case BT_CR: | 1467 | 79.8k | case BT_LF: | 1468 | 117k | case BT_SEMI: | 1469 | 127k | case BT_EXCL: | 1470 | 160k | case BT_AST: | 1471 | 162k | case BT_PERCNT: | 1472 | 165k | case BT_NUM: | 1473 | 165k | # ifdef XML_NS | 1474 | 168k | case BT_COLON: | 1475 | 168k | # endif | 1476 | 168k | break; | 1477 | 35.7k | case BT_S: | 1478 | 35.7k | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { | 1479 | 5 | *badPtr = ptr; | 1480 | 5 | return 0; | 1481 | 5 | } | 1482 | 35.7k | break; | 1483 | 35.7k | case BT_NAME: | 1484 | 18.4k | case BT_NMSTRT: | 1485 | 18.4k | if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) | 1486 | 18.4k | break; | 1487 | | /* fall through */ | 1488 | 16.5k | default: | 1489 | 16.5k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1490 | 4.28k | case 0x24: /* $ */ | 1491 | 16.3k | case 0x40: /* @ */ | 1492 | 16.3k | break; | 1493 | 170 | default: | 1494 | 170 | *badPtr = ptr; | 1495 | 170 | return 0; | 1496 | 16.5k | } | 1497 | 16.3k | break; | 1498 | 238k | } | 1499 | 238k | } | 1500 | 85.4k | return 1; | 1501 | 85.6k | } |
|
1502 | | |
1503 | | /* This must only be called for a well-formed start-tag or empty |
1504 | | element tag. Returns the number of attributes. Pointers to the |
1505 | | first attsMax attributes are stored in atts. |
1506 | | */ |
1507 | | |
1508 | | static int PTRCALL |
1509 | | PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, |
1510 | 21.9M | ATTRIBUTE *atts) { |
1511 | 21.9M | enum { other, inName, inValue } state = inName; |
1512 | 21.9M | int nAtts = 0; |
1513 | 21.9M | int open = 0; /* defined when state == inValue; |
1514 | | initialization just to shut up compilers */ |
1515 | | |
1516 | 237M | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
1517 | 237M | switch (BYTE_TYPE(enc, ptr)) { |
1518 | 0 | # define START_NAME \ |
1519 | 177M | if (state == other) { \ |
1520 | 4.84M | if (nAtts < attsMax) { \ |
1521 | 3.05M | atts[nAtts].name = ptr; \ |
1522 | 3.05M | atts[nAtts].normalized = 1; \ |
1523 | 3.05M | } \ |
1524 | 4.84M | state = inName; \ |
1525 | 4.84M | } |
1526 | 0 | # define LEAD_CASE(n) \ |
1527 | 10.5M | case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ |
1528 | 10.5M | START_NAME ptr += (n - MINBPC(enc)); \ |
1529 | 10.5M | break; |
1530 | 2.41M | LEAD_CASE(2) |
1531 | 7.72M | LEAD_CASE(3) |
1532 | 397k | LEAD_CASE(4) |
1533 | 0 | # undef LEAD_CASE |
1534 | 28.7M | case BT_NONASCII: |
1535 | 110M | case BT_NMSTRT: |
1536 | 167M | case BT_HEX: |
1537 | 167M | START_NAME |
1538 | 167M | break; |
1539 | 0 | # undef START_NAME |
1540 | 2.97M | case BT_QUOT: |
1541 | 2.97M | if (state != inValue) { |
1542 | 1.47M | if (nAtts < attsMax) |
1543 | 1.33M | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
1544 | 1.47M | state = inValue; |
1545 | 1.47M | open = BT_QUOT; |
1546 | 1.50M | } else if (open == BT_QUOT) { |
1547 | 1.47M | state = other; |
1548 | 1.47M | if (nAtts < attsMax) |
1549 | 1.33M | atts[nAtts].valueEnd = ptr; |
1550 | 1.47M | nAtts++; |
1551 | 1.47M | } |
1552 | 2.97M | break; |
1553 | 6.78M | case BT_APOS: |
1554 | 6.78M | if (state != inValue) { |
1555 | 3.37M | if (nAtts < attsMax) |
1556 | 1.72M | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
1557 | 3.37M | state = inValue; |
1558 | 3.37M | open = BT_APOS; |
1559 | 3.41M | } else if (open == BT_APOS) { |
1560 | 3.37M | state = other; |
1561 | 3.37M | if (nAtts < attsMax) |
1562 | 1.72M | atts[nAtts].valueEnd = ptr; |
1563 | 3.37M | nAtts++; |
1564 | 3.37M | } |
1565 | 6.78M | break; |
1566 | 194k | case BT_AMP: |
1567 | 194k | if (nAtts < attsMax) |
1568 | 191k | atts[nAtts].normalized = 0; |
1569 | 194k | break; |
1570 | 7.68M | case BT_S: |
1571 | 7.68M | if (state == inName) |
1572 | 3.45M | state = other; |
1573 | 4.23M | else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized |
1574 | 286k | && (ptr == atts[nAtts].valuePtr |
1575 | 230k | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
1576 | 206k | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
1577 | 100k | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
1578 | 275k | atts[nAtts].normalized = 0; |
1579 | 7.68M | break; |
1580 | 5.43M | case BT_CR: |
1581 | 6.26M | case BT_LF: |
1582 | | /* This case ensures that the first attribute name is counted |
1583 | | Apart from that we could just change state on the quote. */ |
1584 | 6.26M | if (state == inName) |
1585 | 222k | state = other; |
1586 | 6.04M | else if (state == inValue && nAtts < attsMax) |
1587 | 3.81M | atts[nAtts].normalized = 0; |
1588 | 6.26M | break; |
1589 | 19.6M | case BT_GT: |
1590 | 22.3M | case BT_SOL: |
1591 | 22.3M | if (state != inValue) |
1592 | 21.9M | return nAtts; |
1593 | 455k | break; |
1594 | 13.9M | default: |
1595 | 13.9M | break; |
1596 | 237M | } |
1597 | 237M | } |
1598 | | /* not reached */ |
1599 | 21.9M | } Line | Count | Source | 1510 | 21.5M | ATTRIBUTE *atts) { | 1511 | 21.5M | enum { other, inName, inValue } state = inName; | 1512 | 21.5M | int nAtts = 0; | 1513 | 21.5M | int open = 0; /* defined when state == inValue; | 1514 | | initialization just to shut up compilers */ | 1515 | | | 1516 | 200M | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { | 1517 | 200M | switch (BYTE_TYPE(enc, ptr)) { | 1518 | 0 | # define START_NAME \ | 1519 | 0 | if (state == other) { \ | 1520 | 0 | if (nAtts < attsMax) { \ | 1521 | 0 | atts[nAtts].name = ptr; \ | 1522 | 0 | atts[nAtts].normalized = 1; \ | 1523 | 0 | } \ | 1524 | 0 | state = inName; \ | 1525 | 0 | } | 1526 | 0 | # define LEAD_CASE(n) \ | 1527 | 0 | case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ | 1528 | 0 | START_NAME ptr += (n - MINBPC(enc)); \ | 1529 | 0 | break; | 1530 | 2.41M | LEAD_CASE(2) | 1531 | 7.72M | LEAD_CASE(3) | 1532 | 10.4k | LEAD_CASE(4) | 1533 | 0 | # undef LEAD_CASE | 1534 | 0 | case BT_NONASCII: | 1535 | 80.8M | case BT_NMSTRT: | 1536 | 136M | case BT_HEX: | 1537 | 136M | START_NAME | 1538 | 136M | break; | 1539 | 0 | # undef START_NAME | 1540 | 2.84M | case BT_QUOT: | 1541 | 2.84M | if (state != inValue) { | 1542 | 1.41M | if (nAtts < attsMax) | 1543 | 1.29M | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1544 | 1.41M | state = inValue; | 1545 | 1.41M | open = BT_QUOT; | 1546 | 1.42M | } else if (open == BT_QUOT) { | 1547 | 1.41M | state = other; | 1548 | 1.41M | if (nAtts < attsMax) | 1549 | 1.29M | atts[nAtts].valueEnd = ptr; | 1550 | 1.41M | nAtts++; | 1551 | 1.41M | } | 1552 | 2.84M | break; | 1553 | 5.59M | case BT_APOS: | 1554 | 5.59M | if (state != inValue) { | 1555 | 2.79M | if (nAtts < attsMax) | 1556 | 1.42M | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1557 | 2.79M | state = inValue; | 1558 | 2.79M | open = BT_APOS; | 1559 | 2.80M | } else if (open == BT_APOS) { | 1560 | 2.79M | state = other; | 1561 | 2.79M | if (nAtts < attsMax) | 1562 | 1.42M | atts[nAtts].valueEnd = ptr; | 1563 | 2.79M | nAtts++; | 1564 | 2.79M | } | 1565 | 5.59M | break; | 1566 | 80.4k | case BT_AMP: | 1567 | 80.4k | if (nAtts < attsMax) | 1568 | 77.7k | atts[nAtts].normalized = 0; | 1569 | 80.4k | break; | 1570 | 6.01M | case BT_S: | 1571 | 6.01M | if (state == inName) | 1572 | 3.44M | state = other; | 1573 | 2.57M | else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized | 1574 | 26.5k | && (ptr == atts[nAtts].valuePtr | 1575 | 18.3k | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE | 1576 | 6.24k | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE | 1577 | 5.40k | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) | 1578 | 24.8k | atts[nAtts].normalized = 0; | 1579 | 6.01M | break; | 1580 | 4.61M | case BT_CR: | 1581 | 4.82M | case BT_LF: | 1582 | | /* This case ensures that the first attribute name is counted | 1583 | | Apart from that we could just change state on the quote. */ | 1584 | 4.82M | if (state == inName) | 1585 | 162k | state = other; | 1586 | 4.66M | else if (state == inValue && nAtts < attsMax) | 1587 | 2.63M | atts[nAtts].normalized = 0; | 1588 | 4.82M | break; | 1589 | 18.9M | case BT_GT: | 1590 | 21.6M | case BT_SOL: | 1591 | 21.6M | if (state != inValue) | 1592 | 21.5M | return nAtts; | 1593 | 112k | break; | 1594 | 12.5M | default: | 1595 | 12.5M | break; | 1596 | 200M | } | 1597 | 200M | } | 1598 | | /* not reached */ | 1599 | 21.5M | } |
Line | Count | Source | 1510 | 219k | ATTRIBUTE *atts) { | 1511 | 219k | enum { other, inName, inValue } state = inName; | 1512 | 219k | int nAtts = 0; | 1513 | 219k | int open = 0; /* defined when state == inValue; | 1514 | | initialization just to shut up compilers */ | 1515 | | | 1516 | 17.0M | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { | 1517 | 17.0M | switch (BYTE_TYPE(enc, ptr)) { | 1518 | 0 | # define START_NAME \ | 1519 | 0 | if (state == other) { \ | 1520 | 0 | if (nAtts < attsMax) { \ | 1521 | 0 | atts[nAtts].name = ptr; \ | 1522 | 0 | atts[nAtts].normalized = 1; \ | 1523 | 0 | } \ | 1524 | 0 | state = inName; \ | 1525 | 0 | } | 1526 | 0 | # define LEAD_CASE(n) \ | 1527 | 0 | case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ | 1528 | 0 | START_NAME ptr += (n - MINBPC(enc)); \ | 1529 | 0 | break; | 1530 | 0 | LEAD_CASE(2) | 1531 | 0 | LEAD_CASE(3) | 1532 | 243k | LEAD_CASE(4) | 1533 | 0 | # undef LEAD_CASE | 1534 | 10.9M | case BT_NONASCII: | 1535 | 11.8M | case BT_NMSTRT: | 1536 | 11.9M | case BT_HEX: | 1537 | 11.9M | START_NAME | 1538 | 11.9M | break; | 1539 | 0 | # undef START_NAME | 1540 | 30.8k | case BT_QUOT: | 1541 | 30.8k | if (state != inValue) { | 1542 | 11.8k | if (nAtts < attsMax) | 1543 | 10.3k | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1544 | 11.8k | state = inValue; | 1545 | 11.8k | open = BT_QUOT; | 1546 | 19.0k | } else if (open == BT_QUOT) { | 1547 | 11.8k | state = other; | 1548 | 11.8k | if (nAtts < attsMax) | 1549 | 10.3k | atts[nAtts].valueEnd = ptr; | 1550 | 11.8k | nAtts++; | 1551 | 11.8k | } | 1552 | 30.8k | break; | 1553 | 1.15M | case BT_APOS: | 1554 | 1.15M | if (state != inValue) { | 1555 | 569k | if (nAtts < attsMax) | 1556 | 287k | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1557 | 569k | state = inValue; | 1558 | 569k | open = BT_APOS; | 1559 | 586k | } else if (open == BT_APOS) { | 1560 | 569k | state = other; | 1561 | 569k | if (nAtts < attsMax) | 1562 | 287k | atts[nAtts].valueEnd = ptr; | 1563 | 569k | nAtts++; | 1564 | 569k | } | 1565 | 1.15M | break; | 1566 | 17.1k | case BT_AMP: | 1567 | 17.1k | if (nAtts < attsMax) | 1568 | 16.8k | atts[nAtts].normalized = 0; | 1569 | 17.1k | break; | 1570 | 1.61M | case BT_S: | 1571 | 1.61M | if (state == inName) | 1572 | 4.12k | state = other; | 1573 | 1.61M | else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized | 1574 | 252k | && (ptr == atts[nAtts].valuePtr | 1575 | 206k | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE | 1576 | 195k | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE | 1577 | 91.1k | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) | 1578 | 245k | atts[nAtts].normalized = 0; | 1579 | 1.61M | break; | 1580 | 302k | case BT_CR: | 1581 | 359k | case BT_LF: | 1582 | | /* This case ensures that the first attribute name is counted | 1583 | | Apart from that we could just change state on the quote. */ | 1584 | 359k | if (state == inName) | 1585 | 9.85k | state = other; | 1586 | 349k | else if (state == inValue && nAtts < attsMax) | 1587 | 211k | atts[nAtts].normalized = 0; | 1588 | 359k | break; | 1589 | 473k | case BT_GT: | 1590 | 532k | case BT_SOL: | 1591 | 532k | if (state != inValue) | 1592 | 219k | return nAtts; | 1593 | 312k | break; | 1594 | 1.07M | default: | 1595 | 1.07M | break; | 1596 | 17.0M | } | 1597 | 17.0M | } | 1598 | | /* not reached */ | 1599 | 219k | } |
Line | Count | Source | 1510 | 147k | ATTRIBUTE *atts) { | 1511 | 147k | enum { other, inName, inValue } state = inName; | 1512 | 147k | int nAtts = 0; | 1513 | 147k | int open = 0; /* defined when state == inValue; | 1514 | | initialization just to shut up compilers */ | 1515 | | | 1516 | 20.6M | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { | 1517 | 20.6M | switch (BYTE_TYPE(enc, ptr)) { | 1518 | 0 | # define START_NAME \ | 1519 | 0 | if (state == other) { \ | 1520 | 0 | if (nAtts < attsMax) { \ | 1521 | 0 | atts[nAtts].name = ptr; \ | 1522 | 0 | atts[nAtts].normalized = 1; \ | 1523 | 0 | } \ | 1524 | 0 | state = inName; \ | 1525 | 0 | } | 1526 | 0 | # define LEAD_CASE(n) \ | 1527 | 0 | case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ | 1528 | 0 | START_NAME ptr += (n - MINBPC(enc)); \ | 1529 | 0 | break; | 1530 | 0 | LEAD_CASE(2) | 1531 | 0 | LEAD_CASE(3) | 1532 | 143k | LEAD_CASE(4) | 1533 | 0 | # undef LEAD_CASE | 1534 | 17.8M | case BT_NONASCII: | 1535 | 18.2M | case BT_NMSTRT: | 1536 | 18.6M | case BT_HEX: | 1537 | 18.6M | START_NAME | 1538 | 18.6M | break; | 1539 | 0 | # undef START_NAME | 1540 | 105k | case BT_QUOT: | 1541 | 105k | if (state != inValue) { | 1542 | 50.6k | if (nAtts < attsMax) | 1543 | 28.8k | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1544 | 50.6k | state = inValue; | 1545 | 50.6k | open = BT_QUOT; | 1546 | 54.6k | } else if (open == BT_QUOT) { | 1547 | 50.6k | state = other; | 1548 | 50.6k | if (nAtts < attsMax) | 1549 | 28.8k | atts[nAtts].valueEnd = ptr; | 1550 | 50.6k | nAtts++; | 1551 | 50.6k | } | 1552 | 105k | break; | 1553 | 38.3k | case BT_APOS: | 1554 | 38.3k | if (state != inValue) { | 1555 | 10.8k | if (nAtts < attsMax) | 1556 | 8.17k | atts[nAtts].valuePtr = ptr + MINBPC(enc); | 1557 | 10.8k | state = inValue; | 1558 | 10.8k | open = BT_APOS; | 1559 | 27.5k | } else if (open == BT_APOS) { | 1560 | 10.8k | state = other; | 1561 | 10.8k | if (nAtts < attsMax) | 1562 | 8.17k | atts[nAtts].valueEnd = ptr; | 1563 | 10.8k | nAtts++; | 1564 | 10.8k | } | 1565 | 38.3k | break; | 1566 | 97.1k | case BT_AMP: | 1567 | 97.1k | if (nAtts < attsMax) | 1568 | 96.7k | atts[nAtts].normalized = 0; | 1569 | 97.1k | break; | 1570 | 48.0k | case BT_S: | 1571 | 48.0k | if (state == inName) | 1572 | 4.13k | state = other; | 1573 | 43.9k | else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized | 1574 | 7.53k | && (ptr == atts[nAtts].valuePtr | 1575 | 5.43k | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE | 1576 | 4.72k | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE | 1577 | 3.93k | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) | 1578 | 4.63k | atts[nAtts].normalized = 0; | 1579 | 48.0k | break; | 1580 | 512k | case BT_CR: | 1581 | 1.07M | case BT_LF: | 1582 | | /* This case ensures that the first attribute name is counted | 1583 | | Apart from that we could just change state on the quote. */ | 1584 | 1.07M | if (state == inName) | 1585 | 50.2k | state = other; | 1586 | 1.02M | else if (state == inValue && nAtts < attsMax) | 1587 | 973k | atts[nAtts].normalized = 0; | 1588 | 1.07M | break; | 1589 | 158k | case BT_GT: | 1590 | 177k | case BT_SOL: | 1591 | 177k | if (state != inValue) | 1592 | 147k | return nAtts; | 1593 | 30.9k | break; | 1594 | 299k | default: | 1595 | 299k | break; | 1596 | 20.6M | } | 1597 | 20.6M | } | 1598 | | /* not reached */ | 1599 | 147k | } |
|
1600 | | |
1601 | | static int PTRFASTCALL |
1602 | 536k | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { |
1603 | 536k | int result = 0; |
1604 | | /* skip &# */ |
1605 | 536k | UNUSED_P(enc); |
1606 | 536k | ptr += 2 * MINBPC(enc); |
1607 | 536k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
1608 | 1.70M | for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
1609 | 1.18M | ptr += MINBPC(enc)) { |
1610 | 1.18M | int c = BYTE_TO_ASCII(enc, ptr); |
1611 | 1.18M | switch (c) { |
1612 | 6.89k | case ASCII_0: |
1613 | 12.4k | case ASCII_1: |
1614 | 17.0k | case ASCII_2: |
1615 | 22.8k | case ASCII_3: |
1616 | 35.5k | case ASCII_4: |
1617 | 42.5k | case ASCII_5: |
1618 | 48.3k | case ASCII_6: |
1619 | 55.4k | case ASCII_7: |
1620 | 71.3k | case ASCII_8: |
1621 | 73.5k | case ASCII_9: |
1622 | 73.5k | result <<= 4; |
1623 | 73.5k | result |= (c - ASCII_0); |
1624 | 73.5k | break; |
1625 | 463k | case ASCII_A: |
1626 | 468k | case ASCII_B: |
1627 | 471k | case ASCII_C: |
1628 | 472k | case ASCII_D: |
1629 | 486k | case ASCII_E: |
1630 | 640k | case ASCII_F: |
1631 | 640k | result <<= 4; |
1632 | 640k | result += 10 + (c - ASCII_A); |
1633 | 640k | break; |
1634 | 144k | case ASCII_a: |
1635 | 290k | case ASCII_b: |
1636 | 424k | case ASCII_c: |
1637 | 436k | case ASCII_d: |
1638 | 459k | case ASCII_e: |
1639 | 472k | case ASCII_f: |
1640 | 472k | result <<= 4; |
1641 | 472k | result += 10 + (c - ASCII_a); |
1642 | 472k | break; |
1643 | 1.18M | } |
1644 | 1.18M | if (result >= 0x110000) |
1645 | 148 | return -1; |
1646 | 1.18M | } |
1647 | 518k | } else { |
1648 | 58.0k | for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
1649 | 40.1k | int c = BYTE_TO_ASCII(enc, ptr); |
1650 | 40.1k | result *= 10; |
1651 | 40.1k | result += (c - ASCII_0); |
1652 | 40.1k | if (result >= 0x110000) |
1653 | 23 | return -1; |
1654 | 40.1k | } |
1655 | 17.9k | } |
1656 | 536k | return checkCharRefNumber(result); |
1657 | 536k | } xmltok.c:normal_charRefNumber Line | Count | Source | 1602 | 358k | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { | 1603 | 358k | int result = 0; | 1604 | | /* skip &# */ | 1605 | 358k | UNUSED_P(enc); | 1606 | 358k | ptr += 2 * MINBPC(enc); | 1607 | 358k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { | 1608 | 752k | for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); | 1609 | 402k | ptr += MINBPC(enc)) { | 1610 | 402k | int c = BYTE_TO_ASCII(enc, ptr); | 1611 | 402k | switch (c) { | 1612 | 3.45k | case ASCII_0: | 1613 | 7.02k | case ASCII_1: | 1614 | 9.22k | case ASCII_2: | 1615 | 9.69k | case ASCII_3: | 1616 | 12.1k | case ASCII_4: | 1617 | 12.8k | case ASCII_5: | 1618 | 13.1k | case ASCII_6: | 1619 | 16.3k | case ASCII_7: | 1620 | 27.5k | case ASCII_8: | 1621 | 28.1k | case ASCII_9: | 1622 | 28.1k | result <<= 4; | 1623 | 28.1k | result |= (c - ASCII_0); | 1624 | 28.1k | break; | 1625 | 330k | case ASCII_A: | 1626 | 330k | case ASCII_B: | 1627 | 331k | case ASCII_C: | 1628 | 331k | case ASCII_D: | 1629 | 343k | case ASCII_E: | 1630 | 358k | case ASCII_F: | 1631 | 358k | result <<= 4; | 1632 | 358k | result += 10 + (c - ASCII_A); | 1633 | 358k | break; | 1634 | 718 | case ASCII_a: | 1635 | 2.36k | case ASCII_b: | 1636 | 3.12k | case ASCII_c: | 1637 | 3.53k | case ASCII_d: | 1638 | 15.1k | case ASCII_e: | 1639 | 15.8k | case ASCII_f: | 1640 | 15.8k | result <<= 4; | 1641 | 15.8k | result += 10 + (c - ASCII_a); | 1642 | 15.8k | break; | 1643 | 402k | } | 1644 | 402k | if (result >= 0x110000) | 1645 | 66 | return -1; | 1646 | 402k | } | 1647 | 350k | } else { | 1648 | 22.4k | for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { | 1649 | 14.2k | int c = BYTE_TO_ASCII(enc, ptr); | 1650 | 14.2k | result *= 10; | 1651 | 14.2k | result += (c - ASCII_0); | 1652 | 14.2k | if (result >= 0x110000) | 1653 | 12 | return -1; | 1654 | 14.2k | } | 1655 | 8.17k | } | 1656 | 358k | return checkCharRefNumber(result); | 1657 | 358k | } |
xmltok.c:little2_charRefNumber Line | Count | Source | 1602 | 46.3k | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { | 1603 | 46.3k | int result = 0; | 1604 | | /* skip &# */ | 1605 | 46.3k | UNUSED_P(enc); | 1606 | 46.3k | ptr += 2 * MINBPC(enc); | 1607 | 46.3k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { | 1608 | 186k | for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); | 1609 | 149k | ptr += MINBPC(enc)) { | 1610 | 149k | int c = BYTE_TO_ASCII(enc, ptr); | 1611 | 149k | switch (c) { | 1612 | 1.91k | case ASCII_0: | 1613 | 2.74k | case ASCII_1: | 1614 | 3.72k | case ASCII_2: | 1615 | 5.98k | case ASCII_3: | 1616 | 9.32k | case ASCII_4: | 1617 | 12.5k | case ASCII_5: | 1618 | 16.5k | case ASCII_6: | 1619 | 19.0k | case ASCII_7: | 1620 | 20.1k | case ASCII_8: | 1621 | 21.2k | case ASCII_9: | 1622 | 21.2k | result <<= 4; | 1623 | 21.2k | result |= (c - ASCII_0); | 1624 | 21.2k | break; | 1625 | 20.4k | case ASCII_A: | 1626 | 22.1k | case ASCII_B: | 1627 | 22.5k | case ASCII_C: | 1628 | 23.0k | case ASCII_D: | 1629 | 24.1k | case ASCII_E: | 1630 | 47.2k | case ASCII_F: | 1631 | 47.2k | result <<= 4; | 1632 | 47.2k | result += 10 + (c - ASCII_A); | 1633 | 47.2k | break; | 1634 | 22.3k | case ASCII_a: | 1635 | 51.4k | case ASCII_b: | 1636 | 71.8k | case ASCII_c: | 1637 | 73.5k | case ASCII_d: | 1638 | 75.4k | case ASCII_e: | 1639 | 80.9k | case ASCII_f: | 1640 | 80.9k | result <<= 4; | 1641 | 80.9k | result += 10 + (c - ASCII_a); | 1642 | 80.9k | break; | 1643 | 149k | } | 1644 | 149k | if (result >= 0x110000) | 1645 | 43 | return -1; | 1646 | 149k | } | 1647 | 36.7k | } else { | 1648 | 34.9k | for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { | 1649 | 25.3k | int c = BYTE_TO_ASCII(enc, ptr); | 1650 | 25.3k | result *= 10; | 1651 | 25.3k | result += (c - ASCII_0); | 1652 | 25.3k | if (result >= 0x110000) | 1653 | 5 | return -1; | 1654 | 25.3k | } | 1655 | 9.60k | } | 1656 | 46.2k | return checkCharRefNumber(result); | 1657 | 46.3k | } |
xmltok.c:big2_charRefNumber Line | Count | Source | 1602 | 132k | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { | 1603 | 132k | int result = 0; | 1604 | | /* skip &# */ | 1605 | 132k | UNUSED_P(enc); | 1606 | 132k | ptr += 2 * MINBPC(enc); | 1607 | 132k | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { | 1608 | 766k | for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); | 1609 | 634k | ptr += MINBPC(enc)) { | 1610 | 634k | int c = BYTE_TO_ASCII(enc, ptr); | 1611 | 634k | switch (c) { | 1612 | 1.52k | case ASCII_0: | 1613 | 2.69k | case ASCII_1: | 1614 | 4.07k | case ASCII_2: | 1615 | 7.17k | case ASCII_3: | 1616 | 14.0k | case ASCII_4: | 1617 | 17.1k | case ASCII_5: | 1618 | 18.7k | case ASCII_6: | 1619 | 20.0k | case ASCII_7: | 1620 | 23.6k | case ASCII_8: | 1621 | 24.1k | case ASCII_9: | 1622 | 24.1k | result <<= 4; | 1623 | 24.1k | result |= (c - ASCII_0); | 1624 | 24.1k | break; | 1625 | 113k | case ASCII_A: | 1626 | 115k | case ASCII_B: | 1627 | 117k | case ASCII_C: | 1628 | 117k | case ASCII_D: | 1629 | 118k | case ASCII_E: | 1630 | 234k | case ASCII_F: | 1631 | 234k | result <<= 4; | 1632 | 234k | result += 10 + (c - ASCII_A); | 1633 | 234k | break; | 1634 | 121k | case ASCII_a: | 1635 | 236k | case ASCII_b: | 1636 | 349k | case ASCII_c: | 1637 | 359k | case ASCII_d: | 1638 | 368k | case ASCII_e: | 1639 | 375k | case ASCII_f: | 1640 | 375k | result <<= 4; | 1641 | 375k | result += 10 + (c - ASCII_a); | 1642 | 375k | break; | 1643 | 634k | } | 1644 | 634k | if (result >= 0x110000) | 1645 | 39 | return -1; | 1646 | 634k | } | 1647 | 132k | } else { | 1648 | 727 | for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { | 1649 | 555 | int c = BYTE_TO_ASCII(enc, ptr); | 1650 | 555 | result *= 10; | 1651 | 555 | result += (c - ASCII_0); | 1652 | 555 | if (result >= 0x110000) | 1653 | 6 | return -1; | 1654 | 555 | } | 1655 | 178 | } | 1656 | 132k | return checkCharRefNumber(result); | 1657 | 132k | } |
|
1658 | | |
1659 | | static int PTRCALL |
1660 | | PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, |
1661 | 6.78M | const char *end) { |
1662 | 6.78M | UNUSED_P(enc); |
1663 | 6.78M | switch ((end - ptr) / MINBPC(enc)) { |
1664 | 160k | case 2: |
1665 | 160k | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
1666 | 133k | switch (BYTE_TO_ASCII(enc, ptr)) { |
1667 | 85.3k | case ASCII_l: |
1668 | 85.3k | return ASCII_LT; |
1669 | 46.5k | case ASCII_g: |
1670 | 46.5k | return ASCII_GT; |
1671 | 133k | } |
1672 | 133k | } |
1673 | 28.7k | break; |
1674 | 28.7k | case 3: |
1675 | 25.2k | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
1676 | 20.7k | ptr += MINBPC(enc); |
1677 | 20.7k | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
1678 | 11.4k | ptr += MINBPC(enc); |
1679 | 11.4k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
1680 | 1.92k | return ASCII_AMP; |
1681 | 11.4k | } |
1682 | 20.7k | } |
1683 | 23.3k | break; |
1684 | 30.6k | case 4: |
1685 | 30.6k | switch (BYTE_TO_ASCII(enc, ptr)) { |
1686 | 10.0k | case ASCII_q: |
1687 | 10.0k | ptr += MINBPC(enc); |
1688 | 10.0k | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
1689 | 7.54k | ptr += MINBPC(enc); |
1690 | 7.54k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1691 | 4.39k | ptr += MINBPC(enc); |
1692 | 4.39k | if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
1693 | 2.16k | return ASCII_QUOT; |
1694 | 4.39k | } |
1695 | 7.54k | } |
1696 | 7.91k | break; |
1697 | 11.3k | case ASCII_a: |
1698 | 11.3k | ptr += MINBPC(enc); |
1699 | 11.3k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
1700 | 8.10k | ptr += MINBPC(enc); |
1701 | 8.10k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1702 | 4.67k | ptr += MINBPC(enc); |
1703 | 4.67k | if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
1704 | 1.50k | return ASCII_APOS; |
1705 | 4.67k | } |
1706 | 8.10k | } |
1707 | 9.87k | break; |
1708 | 30.6k | } |
1709 | 6.78M | } |
1710 | 6.64M | return 0; |
1711 | 6.78M | } xmltok.c:normal_predefinedEntityName Line | Count | Source | 1661 | 6.62M | const char *end) { | 1662 | 6.62M | UNUSED_P(enc); | 1663 | 6.62M | switch ((end - ptr) / MINBPC(enc)) { | 1664 | 152k | case 2: | 1665 | 152k | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { | 1666 | 130k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1667 | 84.4k | case ASCII_l: | 1668 | 84.4k | return ASCII_LT; | 1669 | 45.9k | case ASCII_g: | 1670 | 45.9k | return ASCII_GT; | 1671 | 130k | } | 1672 | 130k | } | 1673 | 22.5k | break; | 1674 | 22.5k | case 3: | 1675 | 16.8k | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { | 1676 | 14.3k | ptr += MINBPC(enc); | 1677 | 14.3k | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { | 1678 | 9.00k | ptr += MINBPC(enc); | 1679 | 9.00k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) | 1680 | 966 | return ASCII_AMP; | 1681 | 9.00k | } | 1682 | 14.3k | } | 1683 | 15.8k | break; | 1684 | 15.8k | case 4: | 1685 | 12.0k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1686 | 2.43k | case ASCII_q: | 1687 | 2.43k | ptr += MINBPC(enc); | 1688 | 2.43k | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { | 1689 | 1.89k | ptr += MINBPC(enc); | 1690 | 1.89k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1691 | 1.42k | ptr += MINBPC(enc); | 1692 | 1.42k | if (CHAR_MATCHES(enc, ptr, ASCII_t)) | 1693 | 645 | return ASCII_QUOT; | 1694 | 1.42k | } | 1695 | 1.89k | } | 1696 | 1.78k | break; | 1697 | 3.19k | case ASCII_a: | 1698 | 3.19k | ptr += MINBPC(enc); | 1699 | 3.19k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { | 1700 | 1.33k | ptr += MINBPC(enc); | 1701 | 1.33k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1702 | 1.11k | ptr += MINBPC(enc); | 1703 | 1.11k | if (CHAR_MATCHES(enc, ptr, ASCII_s)) | 1704 | 863 | return ASCII_APOS; | 1705 | 1.11k | } | 1706 | 1.33k | } | 1707 | 2.33k | break; | 1708 | 12.0k | } | 1709 | 6.62M | } | 1710 | 6.49M | return 0; | 1711 | 6.62M | } |
xmltok.c:little2_predefinedEntityName Line | Count | Source | 1661 | 34.7k | const char *end) { | 1662 | 34.7k | UNUSED_P(enc); | 1663 | 34.7k | switch ((end - ptr) / MINBPC(enc)) { | 1664 | 3.18k | case 2: | 1665 | 3.18k | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { | 1666 | 1.27k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1667 | 549 | case ASCII_l: | 1668 | 549 | return ASCII_LT; | 1669 | 280 | case ASCII_g: | 1670 | 280 | return ASCII_GT; | 1671 | 1.27k | } | 1672 | 1.27k | } | 1673 | 2.35k | break; | 1674 | 2.79k | case 3: | 1675 | 2.79k | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { | 1676 | 1.38k | ptr += MINBPC(enc); | 1677 | 1.38k | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { | 1678 | 579 | ptr += MINBPC(enc); | 1679 | 579 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) | 1680 | 84 | return ASCII_AMP; | 1681 | 579 | } | 1682 | 1.38k | } | 1683 | 2.71k | break; | 1684 | 7.96k | case 4: | 1685 | 7.96k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1686 | 3.42k | case ASCII_q: | 1687 | 3.42k | ptr += MINBPC(enc); | 1688 | 3.42k | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { | 1689 | 2.41k | ptr += MINBPC(enc); | 1690 | 2.41k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1691 | 1.41k | ptr += MINBPC(enc); | 1692 | 1.41k | if (CHAR_MATCHES(enc, ptr, ASCII_t)) | 1693 | 642 | return ASCII_QUOT; | 1694 | 1.41k | } | 1695 | 2.41k | } | 1696 | 2.78k | break; | 1697 | 2.78k | case ASCII_a: | 1698 | 2.73k | ptr += MINBPC(enc); | 1699 | 2.73k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { | 1700 | 1.94k | ptr += MINBPC(enc); | 1701 | 1.94k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1702 | 1.04k | ptr += MINBPC(enc); | 1703 | 1.04k | if (CHAR_MATCHES(enc, ptr, ASCII_s)) | 1704 | 334 | return ASCII_APOS; | 1705 | 1.04k | } | 1706 | 1.94k | } | 1707 | 2.40k | break; | 1708 | 7.96k | } | 1709 | 34.7k | } | 1710 | 32.8k | return 0; | 1711 | 34.7k | } |
xmltok.c:big2_predefinedEntityName Line | Count | Source | 1661 | 123k | const char *end) { | 1662 | 123k | UNUSED_P(enc); | 1663 | 123k | switch ((end - ptr) / MINBPC(enc)) { | 1664 | 4.58k | case 2: | 1665 | 4.58k | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { | 1666 | 1.47k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1667 | 400 | case ASCII_l: | 1668 | 400 | return ASCII_LT; | 1669 | 309 | case ASCII_g: | 1670 | 309 | return ASCII_GT; | 1671 | 1.47k | } | 1672 | 1.47k | } | 1673 | 3.87k | break; | 1674 | 5.62k | case 3: | 1675 | 5.62k | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { | 1676 | 5.01k | ptr += MINBPC(enc); | 1677 | 5.01k | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { | 1678 | 1.84k | ptr += MINBPC(enc); | 1679 | 1.84k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) | 1680 | 879 | return ASCII_AMP; | 1681 | 1.84k | } | 1682 | 5.01k | } | 1683 | 4.74k | break; | 1684 | 10.6k | case 4: | 1685 | 10.6k | switch (BYTE_TO_ASCII(enc, ptr)) { | 1686 | 4.21k | case ASCII_q: | 1687 | 4.21k | ptr += MINBPC(enc); | 1688 | 4.21k | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { | 1689 | 3.23k | ptr += MINBPC(enc); | 1690 | 3.23k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1691 | 1.55k | ptr += MINBPC(enc); | 1692 | 1.55k | if (CHAR_MATCHES(enc, ptr, ASCII_t)) | 1693 | 874 | return ASCII_QUOT; | 1694 | 1.55k | } | 1695 | 3.23k | } | 1696 | 3.34k | break; | 1697 | 5.44k | case ASCII_a: | 1698 | 5.44k | ptr += MINBPC(enc); | 1699 | 5.44k | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { | 1700 | 4.82k | ptr += MINBPC(enc); | 1701 | 4.82k | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { | 1702 | 2.51k | ptr += MINBPC(enc); | 1703 | 2.51k | if (CHAR_MATCHES(enc, ptr, ASCII_s)) | 1704 | 307 | return ASCII_APOS; | 1705 | 2.51k | } | 1706 | 4.82k | } | 1707 | 5.14k | break; | 1708 | 10.6k | } | 1709 | 123k | } | 1710 | 120k | return 0; | 1711 | 123k | } |
|
1712 | | |
1713 | | static int PTRCALL |
1714 | | PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, |
1715 | 1.33M | const char *end1, const char *ptr2) { |
1716 | 1.33M | UNUSED_P(enc); |
1717 | 6.39M | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
1718 | 5.60M | if (end1 - ptr1 < MINBPC(enc)) { |
1719 | | /* This line cannot be executed. The incoming data has already |
1720 | | * been tokenized once, so incomplete characters like this have |
1721 | | * already been eliminated from the input. Retaining the |
1722 | | * paranoia check is still valuable, however. |
1723 | | */ |
1724 | 291 | return 0; /* LCOV_EXCL_LINE */ |
1725 | 291 | } |
1726 | 5.60M | if (! CHAR_MATCHES(enc, ptr1, *ptr2)) |
1727 | 546k | return 0; |
1728 | 5.60M | } |
1729 | 789k | return ptr1 == end1; |
1730 | 1.33M | } xmltok.c:normal_nameMatchesAscii Line | Count | Source | 1715 | 685k | const char *end1, const char *ptr2) { | 1716 | 685k | UNUSED_P(enc); | 1717 | 4.05M | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { | 1718 | 3.51M | if (end1 - ptr1 < MINBPC(enc)) { | 1719 | | /* This line cannot be executed. The incoming data has already | 1720 | | * been tokenized once, so incomplete characters like this have | 1721 | | * already been eliminated from the input. Retaining the | 1722 | | * paranoia check is still valuable, however. | 1723 | | */ | 1724 | 125 | return 0; /* LCOV_EXCL_LINE */ | 1725 | 125 | } | 1726 | 3.51M | if (! CHAR_MATCHES(enc, ptr1, *ptr2)) | 1727 | 145k | return 0; | 1728 | 3.51M | } | 1729 | 540k | return ptr1 == end1; | 1730 | 685k | } |
xmltok.c:little2_nameMatchesAscii Line | Count | Source | 1715 | 98.7k | const char *end1, const char *ptr2) { | 1716 | 98.7k | UNUSED_P(enc); | 1717 | 430k | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { | 1718 | 380k | if (end1 - ptr1 < MINBPC(enc)) { | 1719 | | /* This line cannot be executed. The incoming data has already | 1720 | | * been tokenized once, so incomplete characters like this have | 1721 | | * already been eliminated from the input. Retaining the | 1722 | | * paranoia check is still valuable, however. | 1723 | | */ | 1724 | 100 | return 0; /* LCOV_EXCL_LINE */ | 1725 | 100 | } | 1726 | 380k | if (! CHAR_MATCHES(enc, ptr1, *ptr2)) | 1727 | 48.2k | return 0; | 1728 | 380k | } | 1729 | 50.3k | return ptr1 == end1; | 1730 | 98.7k | } |
xmltok.c:big2_nameMatchesAscii Line | Count | Source | 1715 | 552k | const char *end1, const char *ptr2) { | 1716 | 552k | UNUSED_P(enc); | 1717 | 1.91M | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { | 1718 | 1.71M | if (end1 - ptr1 < MINBPC(enc)) { | 1719 | | /* This line cannot be executed. The incoming data has already | 1720 | | * been tokenized once, so incomplete characters like this have | 1721 | | * already been eliminated from the input. Retaining the | 1722 | | * paranoia check is still valuable, however. | 1723 | | */ | 1724 | 66 | return 0; /* LCOV_EXCL_LINE */ | 1725 | 66 | } | 1726 | 1.71M | if (! CHAR_MATCHES(enc, ptr1, *ptr2)) | 1727 | 353k | return 0; | 1728 | 1.71M | } | 1729 | 199k | return ptr1 == end1; | 1730 | 552k | } |
|
1731 | | |
1732 | | static int PTRFASTCALL |
1733 | 23.6M | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { |
1734 | 23.6M | const char *start = ptr; |
1735 | 150M | for (;;) { |
1736 | 150M | switch (BYTE_TYPE(enc, ptr)) { |
1737 | 0 | # define LEAD_CASE(n) \ |
1738 | 10.5M | case BT_LEAD##n: \ |
1739 | 10.5M | ptr += n; /* NOTE: The encoding has already been validated. */ \ |
1740 | 10.5M | break; |
1741 | 10.3M | LEAD_CASE(2) |
1742 | 242k | LEAD_CASE(3) |
1743 | 0 | LEAD_CASE(4) |
1744 | 0 | # undef LEAD_CASE |
1745 | 631k | case BT_NONASCII: |
1746 | 58.6M | case BT_NMSTRT: |
1747 | 58.6M | # ifdef XML_NS |
1748 | 58.8M | case BT_COLON: |
1749 | 58.8M | # endif |
1750 | 114M | case BT_HEX: |
1751 | 115M | case BT_DIGIT: |
1752 | 116M | case BT_NAME: |
1753 | 116M | case BT_MINUS: |
1754 | 116M | ptr += MINBPC(enc); |
1755 | 116M | break; |
1756 | 23.6M | default: |
1757 | 23.6M | return (int)(ptr - start); |
1758 | 150M | } |
1759 | 150M | } |
1760 | 23.6M | } xmltok.c:normal_nameLength Line | Count | Source | 1733 | 23.0M | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { | 1734 | 23.0M | const char *start = ptr; | 1735 | 148M | for (;;) { | 1736 | 148M | switch (BYTE_TYPE(enc, ptr)) { | 1737 | 0 | # define LEAD_CASE(n) \ | 1738 | 0 | case BT_LEAD##n: \ | 1739 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1740 | 0 | break; | 1741 | 10.3M | LEAD_CASE(2) | 1742 | 242k | LEAD_CASE(3) | 1743 | 0 | LEAD_CASE(4) | 1744 | 0 | # undef LEAD_CASE | 1745 | 0 | case BT_NONASCII: | 1746 | 57.3M | case BT_NMSTRT: | 1747 | 57.3M | # ifdef XML_NS | 1748 | 57.5M | case BT_COLON: | 1749 | 57.5M | # endif | 1750 | 112M | case BT_HEX: | 1751 | 114M | case BT_DIGIT: | 1752 | 114M | case BT_NAME: | 1753 | 114M | case BT_MINUS: | 1754 | 114M | ptr += MINBPC(enc); | 1755 | 114M | break; | 1756 | 23.0M | default: | 1757 | 23.0M | return (int)(ptr - start); | 1758 | 148M | } | 1759 | 148M | } | 1760 | 23.0M | } |
xmltok.c:little2_nameLength Line | Count | Source | 1733 | 306k | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { | 1734 | 306k | const char *start = ptr; | 1735 | 1.11M | for (;;) { | 1736 | 1.11M | switch (BYTE_TYPE(enc, ptr)) { | 1737 | 0 | # define LEAD_CASE(n) \ | 1738 | 0 | case BT_LEAD##n: \ | 1739 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1740 | 0 | break; | 1741 | 0 | LEAD_CASE(2) | 1742 | 0 | LEAD_CASE(3) | 1743 | 0 | LEAD_CASE(4) | 1744 | 0 | # undef LEAD_CASE | 1745 | 459k | case BT_NONASCII: | 1746 | 775k | case BT_NMSTRT: | 1747 | 775k | # ifdef XML_NS | 1748 | 777k | case BT_COLON: | 1749 | 777k | # endif | 1750 | 805k | case BT_HEX: | 1751 | 810k | case BT_DIGIT: | 1752 | 810k | case BT_NAME: | 1753 | 812k | case BT_MINUS: | 1754 | 812k | ptr += MINBPC(enc); | 1755 | 812k | break; | 1756 | 306k | default: | 1757 | 306k | return (int)(ptr - start); | 1758 | 1.11M | } | 1759 | 1.11M | } | 1760 | 306k | } |
Line | Count | Source | 1733 | 364k | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { | 1734 | 364k | const char *start = ptr; | 1735 | 1.04M | for (;;) { | 1736 | 1.04M | switch (BYTE_TYPE(enc, ptr)) { | 1737 | 0 | # define LEAD_CASE(n) \ | 1738 | 0 | case BT_LEAD##n: \ | 1739 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1740 | 0 | break; | 1741 | 0 | LEAD_CASE(2) | 1742 | 0 | LEAD_CASE(3) | 1743 | 0 | LEAD_CASE(4) | 1744 | 0 | # undef LEAD_CASE | 1745 | 171k | case BT_NONASCII: | 1746 | 508k | case BT_NMSTRT: | 1747 | 508k | # ifdef XML_NS | 1748 | 508k | case BT_COLON: | 1749 | 508k | # endif | 1750 | 682k | case BT_HEX: | 1751 | 684k | case BT_DIGIT: | 1752 | 684k | case BT_NAME: | 1753 | 685k | case BT_MINUS: | 1754 | 685k | ptr += MINBPC(enc); | 1755 | 685k | break; | 1756 | 364k | default: | 1757 | 364k | return (int)(ptr - start); | 1758 | 1.04M | } | 1759 | 1.04M | } | 1760 | 364k | } |
|
1761 | | |
1762 | | static const char *PTRFASTCALL |
1763 | 319k | PREFIX(skipS)(const ENCODING *enc, const char *ptr) { |
1764 | 601k | for (;;) { |
1765 | 601k | switch (BYTE_TYPE(enc, ptr)) { |
1766 | 136k | case BT_LF: |
1767 | 253k | case BT_CR: |
1768 | 281k | case BT_S: |
1769 | 281k | ptr += MINBPC(enc); |
1770 | 281k | break; |
1771 | 319k | default: |
1772 | 319k | return ptr; |
1773 | 601k | } |
1774 | 601k | } |
1775 | 319k | } Line | Count | Source | 1763 | 73.6k | PREFIX(skipS)(const ENCODING *enc, const char *ptr) { | 1764 | 138k | for (;;) { | 1765 | 138k | switch (BYTE_TYPE(enc, ptr)) { | 1766 | 46.0k | case BT_LF: | 1767 | 50.7k | case BT_CR: | 1768 | 64.7k | case BT_S: | 1769 | 64.7k | ptr += MINBPC(enc); | 1770 | 64.7k | break; | 1771 | 73.6k | default: | 1772 | 73.6k | return ptr; | 1773 | 138k | } | 1774 | 138k | } | 1775 | 73.6k | } |
Line | Count | Source | 1763 | 77.4k | PREFIX(skipS)(const ENCODING *enc, const char *ptr) { | 1764 | 152k | for (;;) { | 1765 | 152k | switch (BYTE_TYPE(enc, ptr)) { | 1766 | 27.8k | case BT_LF: | 1767 | 63.4k | case BT_CR: | 1768 | 74.9k | case BT_S: | 1769 | 74.9k | ptr += MINBPC(enc); | 1770 | 74.9k | break; | 1771 | 77.4k | default: | 1772 | 77.4k | return ptr; | 1773 | 152k | } | 1774 | 152k | } | 1775 | 77.4k | } |
Line | Count | Source | 1763 | 168k | PREFIX(skipS)(const ENCODING *enc, const char *ptr) { | 1764 | 310k | for (;;) { | 1765 | 310k | switch (BYTE_TYPE(enc, ptr)) { | 1766 | 62.3k | case BT_LF: | 1767 | 138k | case BT_CR: | 1768 | 141k | case BT_S: | 1769 | 141k | ptr += MINBPC(enc); | 1770 | 141k | break; | 1771 | 168k | default: | 1772 | 168k | return ptr; | 1773 | 310k | } | 1774 | 310k | } | 1775 | 168k | } |
|
1776 | | |
1777 | | static void PTRCALL |
1778 | | PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, |
1779 | 37.0M | POSITION *pos) { |
1780 | 752M | while (HAS_CHAR(enc, ptr, end)) { |
1781 | 752M | switch (BYTE_TYPE(enc, ptr)) { |
1782 | 0 | # define LEAD_CASE(n) \ |
1783 | 11.1M | case BT_LEAD##n: \ |
1784 | 11.1M | ptr += n; /* NOTE: The encoding has already been validated. */ \ |
1785 | 11.1M | pos->columnNumber++; \ |
1786 | 11.1M | break; |
1787 | 1.46M | LEAD_CASE(2) |
1788 | 1.88M | LEAD_CASE(3) |
1789 | 7.84M | LEAD_CASE(4) |
1790 | 0 | # undef LEAD_CASE |
1791 | 4.51M | case BT_LF: |
1792 | 4.51M | pos->columnNumber = 0; |
1793 | 4.51M | pos->lineNumber++; |
1794 | 4.51M | ptr += MINBPC(enc); |
1795 | 4.51M | break; |
1796 | 31.1M | case BT_CR: |
1797 | 31.1M | pos->lineNumber++; |
1798 | 31.1M | ptr += MINBPC(enc); |
1799 | 31.1M | if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) |
1800 | 462k | ptr += MINBPC(enc); |
1801 | 31.1M | pos->columnNumber = 0; |
1802 | 31.1M | break; |
1803 | 705M | default: |
1804 | 705M | ptr += MINBPC(enc); |
1805 | 705M | pos->columnNumber++; |
1806 | 705M | break; |
1807 | 752M | } |
1808 | 752M | } |
1809 | 37.0M | } xmltok.c:normal_updatePosition Line | Count | Source | 1779 | 32.5M | POSITION *pos) { | 1780 | 544M | while (HAS_CHAR(enc, ptr, end)) { | 1781 | 544M | switch (BYTE_TYPE(enc, ptr)) { | 1782 | 0 | # define LEAD_CASE(n) \ | 1783 | 0 | case BT_LEAD##n: \ | 1784 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1785 | 0 | pos->columnNumber++; \ | 1786 | 0 | break; | 1787 | 1.46M | LEAD_CASE(2) | 1788 | 1.88M | LEAD_CASE(3) | 1789 | 14.0k | LEAD_CASE(4) | 1790 | 0 | # undef LEAD_CASE | 1791 | 3.09M | case BT_LF: | 1792 | 3.09M | pos->columnNumber = 0; | 1793 | 3.09M | pos->lineNumber++; | 1794 | 3.09M | ptr += MINBPC(enc); | 1795 | 3.09M | break; | 1796 | 27.7M | case BT_CR: | 1797 | 27.7M | pos->lineNumber++; | 1798 | 27.7M | ptr += MINBPC(enc); | 1799 | 27.7M | if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) | 1800 | 15.6k | ptr += MINBPC(enc); | 1801 | 27.7M | pos->columnNumber = 0; | 1802 | 27.7M | break; | 1803 | 509M | default: | 1804 | 509M | ptr += MINBPC(enc); | 1805 | 509M | pos->columnNumber++; | 1806 | 509M | break; | 1807 | 544M | } | 1808 | 544M | } | 1809 | 32.5M | } |
xmltok.c:little2_updatePosition Line | Count | Source | 1779 | 2.06M | POSITION *pos) { | 1780 | 102M | while (HAS_CHAR(enc, ptr, end)) { | 1781 | 102M | switch (BYTE_TYPE(enc, ptr)) { | 1782 | 0 | # define LEAD_CASE(n) \ | 1783 | 0 | case BT_LEAD##n: \ | 1784 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1785 | 0 | pos->columnNumber++; \ | 1786 | 0 | break; | 1787 | 0 | LEAD_CASE(2) | 1788 | 0 | LEAD_CASE(3) | 1789 | 3.77M | LEAD_CASE(4) | 1790 | 0 | # undef LEAD_CASE | 1791 | 665k | case BT_LF: | 1792 | 665k | pos->columnNumber = 0; | 1793 | 665k | pos->lineNumber++; | 1794 | 665k | ptr += MINBPC(enc); | 1795 | 665k | break; | 1796 | 1.45M | case BT_CR: | 1797 | 1.45M | pos->lineNumber++; | 1798 | 1.45M | ptr += MINBPC(enc); | 1799 | 1.45M | if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) | 1800 | 64.7k | ptr += MINBPC(enc); | 1801 | 1.45M | pos->columnNumber = 0; | 1802 | 1.45M | break; | 1803 | 96.1M | default: | 1804 | 96.1M | ptr += MINBPC(enc); | 1805 | 96.1M | pos->columnNumber++; | 1806 | 96.1M | break; | 1807 | 102M | } | 1808 | 102M | } | 1809 | 2.06M | } |
xmltok.c:big2_updatePosition Line | Count | Source | 1779 | 2.39M | POSITION *pos) { | 1780 | 106M | while (HAS_CHAR(enc, ptr, end)) { | 1781 | 106M | switch (BYTE_TYPE(enc, ptr)) { | 1782 | 0 | # define LEAD_CASE(n) \ | 1783 | 0 | case BT_LEAD##n: \ | 1784 | 0 | ptr += n; /* NOTE: The encoding has already been validated. */ \ | 1785 | 0 | pos->columnNumber++; \ | 1786 | 0 | break; | 1787 | 0 | LEAD_CASE(2) | 1788 | 0 | LEAD_CASE(3) | 1789 | 4.05M | LEAD_CASE(4) | 1790 | 0 | # undef LEAD_CASE | 1791 | 753k | case BT_LF: | 1792 | 753k | pos->columnNumber = 0; | 1793 | 753k | pos->lineNumber++; | 1794 | 753k | ptr += MINBPC(enc); | 1795 | 753k | break; | 1796 | 1.96M | case BT_CR: | 1797 | 1.96M | pos->lineNumber++; | 1798 | 1.96M | ptr += MINBPC(enc); | 1799 | 1.96M | if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) | 1800 | 382k | ptr += MINBPC(enc); | 1801 | 1.96M | pos->columnNumber = 0; | 1802 | 1.96M | break; | 1803 | 99.3M | default: | 1804 | 99.3M | ptr += MINBPC(enc); | 1805 | 99.3M | pos->columnNumber++; | 1806 | 99.3M | break; | 1807 | 106M | } | 1808 | 106M | } | 1809 | 2.39M | } |
|
1810 | | |
1811 | | # undef DO_LEAD_CASE |
1812 | | # undef MULTIBYTE_CASES |
1813 | | # undef INVALID_CASES |
1814 | | # undef CHECK_NAME_CASE |
1815 | | # undef CHECK_NAME_CASES |
1816 | | # undef CHECK_NMSTRT_CASE |
1817 | | # undef CHECK_NMSTRT_CASES |
1818 | | |
1819 | | #endif /* XML_TOK_IMPL_C */ |