/src/CMake/Utilities/cmexpat/lib/xmltok.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |                             __  __            _ | 
| 3 |  |                          ___\ \/ /_ __   __ _| |_ | 
| 4 |  |                         / _ \\  /| '_ \ / _` | __| | 
| 5 |  |                        |  __//  \| |_) | (_| | |_ | 
| 6 |  |                         \___/_/\_\ .__/ \__,_|\__| | 
| 7 |  |                                  |_| XML parser | 
| 8 |  |  | 
| 9 |  |    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd | 
| 10 |  |    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net> | 
| 11 |  |    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> | 
| 12 |  |    Copyright (c) 2002      Greg Stein <gstein@users.sourceforge.net> | 
| 13 |  |    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> | 
| 14 |  |    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> | 
| 15 |  |    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> | 
| 16 |  |    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com> | 
| 17 |  |    Copyright (c) 2016      Don Lewis <truckman@apache.org> | 
| 18 |  |    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk> | 
| 19 |  |    Copyright (c) 2017      Alexander Bluhm <alexander.bluhm@gmx.net> | 
| 20 |  |    Copyright (c) 2017      Benbuck Nason <bnason@netflix.com> | 
| 21 |  |    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com> | 
| 22 |  |    Copyright (c) 2019      David Loffredo <loffredo@steptools.com> | 
| 23 |  |    Copyright (c) 2021      Dong-hee Na <donghee.na@python.org> | 
| 24 |  |    Licensed under the MIT license: | 
| 25 |  |  | 
| 26 |  |    Permission is  hereby granted,  free of charge,  to any  person obtaining | 
| 27 |  |    a  copy  of  this  software   and  associated  documentation  files  (the | 
| 28 |  |    "Software"),  to  deal in  the  Software  without restriction,  including | 
| 29 |  |    without  limitation the  rights  to use,  copy,  modify, merge,  publish, | 
| 30 |  |    distribute, sublicense, and/or sell copies of the Software, and to permit | 
| 31 |  |    persons  to whom  the Software  is  furnished to  do so,  subject to  the | 
| 32 |  |    following conditions: | 
| 33 |  |  | 
| 34 |  |    The above copyright  notice and this permission notice  shall be included | 
| 35 |  |    in all copies or substantial portions of the Software. | 
| 36 |  |  | 
| 37 |  |    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND, | 
| 38 |  |    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF | 
| 39 |  |    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN | 
| 40 |  |    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | 
| 41 |  |    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR | 
| 42 |  |    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | 
| 43 |  |    USE OR OTHER DEALINGS IN THE SOFTWARE. | 
| 44 |  | */ | 
| 45 |  |  | 
| 46 |  | #include <expat_config.h> | 
| 47 |  |  | 
| 48 |  | #include <stddef.h> | 
| 49 |  | #include <string.h> /* memcpy */ | 
| 50 |  | #include <stdbool.h> | 
| 51 |  |  | 
| 52 |  | #ifdef _WIN32 | 
| 53 |  | #  include "winconfig.h" | 
| 54 |  | #endif | 
| 55 |  |  | 
| 56 |  | #include "expat_external.h" | 
| 57 |  | #include "internal.h" | 
| 58 |  | #include "xmltok.h" | 
| 59 |  | #include "nametab.h" | 
| 60 |  |  | 
| 61 |  | #ifdef XML_DTD | 
| 62 |  | #  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) | 
| 63 |  | #else | 
| 64 |  | #  define IGNORE_SECTION_TOK_VTABLE /* as nothing */ | 
| 65 |  | #endif | 
| 66 |  |  | 
| 67 |  | #define VTABLE1                                                                \ | 
| 68 |  |   {PREFIX(prologTok), PREFIX(contentTok),                                      \ | 
| 69 |  |    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \ | 
| 70 |  |       {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \ | 
| 71 |  |       PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \ | 
| 72 |  |       PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \ | 
| 73 |  |       PREFIX(updatePosition), PREFIX(isPublicId) | 
| 74 |  |  | 
| 75 |  | #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) | 
| 76 |  |  | 
| 77 |  | #define UCS2_GET_NAMING(pages, hi, lo)                                         \ | 
| 78 | 269k |   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) | 
| 79 |  |  | 
| 80 |  | /* A 2 byte UTF-8 representation splits the characters 11 bits between | 
| 81 |  |    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into | 
| 82 |  |    pages, 3 bits to add to that index and 5 bits to generate the mask. | 
| 83 |  | */ | 
| 84 |  | #define UTF8_GET_NAMING2(pages, byte)                                          \ | 
| 85 | 13.4M |   (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \ | 
| 86 | 13.4M |                 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \ | 
| 87 | 13.4M |    & (1u << (((byte)[1]) & 0x1F))) | 
| 88 |  |  | 
| 89 |  | /* A 3 byte UTF-8 representation splits the characters 16 bits between | 
| 90 |  |    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index | 
| 91 |  |    into pages, 3 bits to add to that index and 5 bits to generate the | 
| 92 |  |    mask. | 
| 93 |  | */ | 
| 94 |  | #define UTF8_GET_NAMING3(pages, byte)                                          \ | 
| 95 | 115k |   (namingBitmap                                                                \ | 
| 96 | 115k |        [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \ | 
| 97 | 115k |          << 3)                                                                 \ | 
| 98 | 115k |         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \ | 
| 99 | 115k |    & (1u << (((byte)[2]) & 0x1F))) | 
| 100 |  |  | 
| 101 |  | /* Detection of invalid UTF-8 sequences is based on Table 3.1B | 
| 102 |  |    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ | 
| 103 |  |    with the additional restriction of not allowing the Unicode | 
| 104 |  |    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). | 
| 105 |  |    Implementation details: | 
| 106 |  |      (A & 0x80) == 0     means A < 0x80 | 
| 107 |  |    and | 
| 108 |  |      (A & 0xC0) == 0xC0  means A > 0xBF | 
| 109 |  | */ | 
| 110 |  |  | 
| 111 |  | #define UTF8_INVALID2(p)                                                       \ | 
| 112 | 133M |   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) | 
| 113 |  |  | 
| 114 |  | #define UTF8_INVALID3(p)                                                       \ | 
| 115 | 391k |   (((p)[2] & 0x80) == 0                                                        \ | 
| 116 | 391k |    || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \ | 
| 117 | 391k |                                       : ((p)[2] & 0xC0) == 0xC0)               \ | 
| 118 | 391k |    || ((*p) == 0xE0                                                            \ | 
| 119 | 391k |            ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \ | 
| 120 | 391k |            : ((p)[1] & 0x80) == 0                                              \ | 
| 121 | 386k |                  || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) | 
| 122 |  |  | 
| 123 |  | #define UTF8_INVALID4(p)                                                       \ | 
| 124 | 87.8k |   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \ | 
| 125 | 87.8k |    || ((p)[2] & 0xC0) == 0xC0                                                  \ | 
| 126 | 87.8k |    || ((*p) == 0xF0                                                            \ | 
| 127 | 87.7k |            ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \ | 
| 128 | 87.7k |            : ((p)[1] & 0x80) == 0                                              \ | 
| 129 | 86.7k |                  || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) | 
| 130 |  |  | 
| 131 |  | static int PTRFASTCALL | 
| 132 | 142 | isNever(const ENCODING *enc, const char *p) { | 
| 133 | 142 |   UNUSED_P(enc); | 
| 134 | 142 |   UNUSED_P(p); | 
| 135 | 142 |   return 0; | 
| 136 | 142 | } | 
| 137 |  |  | 
| 138 |  | static int PTRFASTCALL | 
| 139 | 13.4M | utf8_isName2(const ENCODING *enc, const char *p) { | 
| 140 | 13.4M |   UNUSED_P(enc); | 
| 141 | 13.4M |   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); | 
| 142 | 13.4M | } | 
| 143 |  |  | 
| 144 |  | static int PTRFASTCALL | 
| 145 | 100k | utf8_isName3(const ENCODING *enc, const char *p) { | 
| 146 | 100k |   UNUSED_P(enc); | 
| 147 | 100k |   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); | 
| 148 | 100k | } | 
| 149 |  |  | 
| 150 |  | #define utf8_isName4 isNever | 
| 151 |  |  | 
| 152 |  | static int PTRFASTCALL | 
| 153 | 49.6k | utf8_isNmstrt2(const ENCODING *enc, const char *p) { | 
| 154 | 49.6k |   UNUSED_P(enc); | 
| 155 | 49.6k |   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); | 
| 156 | 49.6k | } | 
| 157 |  |  | 
| 158 |  | static int PTRFASTCALL | 
| 159 | 15.6k | utf8_isNmstrt3(const ENCODING *enc, const char *p) { | 
| 160 | 15.6k |   UNUSED_P(enc); | 
| 161 | 15.6k |   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); | 
| 162 | 15.6k | } | 
| 163 |  |  | 
| 164 |  | #define utf8_isNmstrt4 isNever | 
| 165 |  |  | 
| 166 |  | static int PTRFASTCALL | 
| 167 | 133M | utf8_isInvalid2(const ENCODING *enc, const char *p) { | 
| 168 | 133M |   UNUSED_P(enc); | 
| 169 | 133M |   return UTF8_INVALID2((const unsigned char *)p); | 
| 170 | 133M | } | 
| 171 |  |  | 
| 172 |  | static int PTRFASTCALL | 
| 173 | 391k | utf8_isInvalid3(const ENCODING *enc, const char *p) { | 
| 174 | 391k |   UNUSED_P(enc); | 
| 175 | 391k |   return UTF8_INVALID3((const unsigned char *)p); | 
| 176 | 391k | } | 
| 177 |  |  | 
| 178 |  | static int PTRFASTCALL | 
| 179 | 87.8k | utf8_isInvalid4(const ENCODING *enc, const char *p) { | 
| 180 | 87.8k |   UNUSED_P(enc); | 
| 181 | 87.8k |   return UTF8_INVALID4((const unsigned char *)p); | 
| 182 | 87.8k | } | 
| 183 |  |  | 
| 184 |  | struct normal_encoding { | 
| 185 |  |   ENCODING enc; | 
| 186 |  |   unsigned char type[256]; | 
| 187 |  | #ifdef XML_MIN_SIZE | 
| 188 |  |   int(PTRFASTCALL *byteType)(const ENCODING *, const char *); | 
| 189 |  |   int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *); | 
| 190 |  |   int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); | 
| 191 |  |   int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); | 
| 192 |  |   int(PTRCALL *charMatches)(const ENCODING *, const char *, int); | 
| 193 |  | #endif /* XML_MIN_SIZE */ | 
| 194 |  |   int(PTRFASTCALL *isName2)(const ENCODING *, const char *); | 
| 195 |  |   int(PTRFASTCALL *isName3)(const ENCODING *, const char *); | 
| 196 |  |   int(PTRFASTCALL *isName4)(const ENCODING *, const char *); | 
| 197 |  |   int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); | 
| 198 |  |   int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); | 
| 199 |  |   int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); | 
| 200 |  |   int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); | 
| 201 |  |   int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); | 
| 202 |  |   int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); | 
| 203 |  | }; | 
| 204 |  |  | 
| 205 | 147M | #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc)) | 
| 206 |  |  | 
| 207 |  | #ifdef XML_MIN_SIZE | 
| 208 |  |  | 
| 209 |  | #  define STANDARD_VTABLE(E)                                                   \ | 
| 210 |  |     E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, | 
| 211 |  |  | 
| 212 |  | #else | 
| 213 |  |  | 
| 214 |  | #  define STANDARD_VTABLE(E) /* as nothing */ | 
| 215 |  |  | 
| 216 |  | #endif | 
| 217 |  |  | 
| 218 |  | #define NORMAL_VTABLE(E)                                                       \ | 
| 219 |  |   E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \ | 
| 220 |  |       E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4 | 
| 221 |  |  | 
| 222 |  | #define NULL_VTABLE                                                            \ | 
| 223 |  |   /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \ | 
| 224 |  |       /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \ | 
| 225 |  |       /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL | 
| 226 |  |  | 
| 227 |  | static int FASTCALL checkCharRefNumber(int); | 
| 228 |  |  | 
| 229 |  | #include "xmltok_impl.h" | 
| 230 |  | #include "ascii.h" | 
| 231 |  |  | 
| 232 |  | #ifdef XML_MIN_SIZE | 
| 233 |  | #  define sb_isNameMin isNever | 
| 234 |  | #  define sb_isNmstrtMin isNever | 
| 235 |  | #endif | 
| 236 |  |  | 
| 237 |  | #ifdef XML_MIN_SIZE | 
| 238 |  | #  define MINBPC(enc) ((enc)->minBytesPerChar) | 
| 239 |  | #else | 
| 240 |  | /* minimum bytes per character */ | 
| 241 | 13.5G | #  define MINBPC(enc) 1 | 
| 242 |  | #endif | 
| 243 |  |  | 
| 244 |  | #define SB_BYTE_TYPE(enc, p)                                                   \ | 
| 245 | 10.5G |   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) | 
| 246 |  |  | 
| 247 |  | #ifdef XML_MIN_SIZE | 
| 248 |  | static int PTRFASTCALL | 
| 249 |  | sb_byteType(const ENCODING *enc, const char *p) { | 
| 250 |  |   return SB_BYTE_TYPE(enc, p); | 
| 251 |  | } | 
| 252 |  | #  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) | 
| 253 |  | #else | 
| 254 | 10.5G | #  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) | 
| 255 |  | #endif | 
| 256 |  |  | 
| 257 |  | #ifdef XML_MIN_SIZE | 
| 258 |  | #  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) | 
| 259 |  | static int PTRFASTCALL | 
| 260 |  | sb_byteToAscii(const ENCODING *enc, const char *p) { | 
| 261 |  |   UNUSED_P(enc); | 
| 262 |  |   return *p; | 
| 263 |  | } | 
| 264 |  | #else | 
| 265 | 322k | #  define BYTE_TO_ASCII(enc, p) (*(p)) | 
| 266 |  | #endif | 
| 267 |  |  | 
| 268 | 13.5M | #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) | 
| 269 | 65.3k | #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) | 
| 270 |  | #ifdef XML_MIN_SIZE | 
| 271 |  | #  define IS_INVALID_CHAR(enc, p, n)                                           \ | 
| 272 |  |     (AS_NORMAL_ENCODING(enc)->isInvalid##n                                     \ | 
| 273 |  |      && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) | 
| 274 |  | #else | 
| 275 |  | #  define IS_INVALID_CHAR(enc, p, n)                                           \ | 
| 276 | 147M |     (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) | 
| 277 |  | #endif | 
| 278 |  |  | 
| 279 |  | #ifdef XML_MIN_SIZE | 
| 280 |  | #  define IS_NAME_CHAR_MINBPC(enc, p)                                          \ | 
| 281 |  |     (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) | 
| 282 |  | #  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \ | 
| 283 |  |     (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) | 
| 284 |  | #else | 
| 285 | 0 | #  define IS_NAME_CHAR_MINBPC(enc, p) (0) | 
| 286 | 0 | #  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) | 
| 287 |  | #endif | 
| 288 |  |  | 
| 289 |  | #ifdef XML_MIN_SIZE | 
| 290 |  | #  define CHAR_MATCHES(enc, p, c)                                              \ | 
| 291 |  |     (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) | 
| 292 |  | static int PTRCALL | 
| 293 |  | sb_charMatches(const ENCODING *enc, const char *p, int c) { | 
| 294 |  |   UNUSED_P(enc); | 
| 295 |  |   return *p == c; | 
| 296 |  | } | 
| 297 |  | #else | 
| 298 |  | /* c is an ASCII character */ | 
| 299 | 2.14M | #  define CHAR_MATCHES(enc, p, c) (*(p) == c) | 
| 300 |  | #endif | 
| 301 |  |  | 
| 302 | 31.4M | #define PREFIX(ident) normal_##ident | 
| 303 |  | #define XML_TOK_IMPL_C | 
| 304 |  | #include "xmltok_impl.c" | 
| 305 |  | #undef XML_TOK_IMPL_C | 
| 306 |  |  | 
| 307 |  | #undef MINBPC | 
| 308 |  | #undef BYTE_TYPE | 
| 309 |  | #undef BYTE_TO_ASCII | 
| 310 |  | #undef CHAR_MATCHES | 
| 311 |  | #undef IS_NAME_CHAR | 
| 312 |  | #undef IS_NAME_CHAR_MINBPC | 
| 313 |  | #undef IS_NMSTRT_CHAR | 
| 314 |  | #undef IS_NMSTRT_CHAR_MINBPC | 
| 315 |  | #undef IS_INVALID_CHAR | 
| 316 |  |  | 
| 317 |  | enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ | 
| 318 |  |        UTF8_cval1 = 0x00, | 
| 319 |  |        UTF8_cval2 = 0xc0, | 
| 320 |  |        UTF8_cval3 = 0xe0, | 
| 321 |  |        UTF8_cval4 = 0xf0 | 
| 322 |  | }; | 
| 323 |  |  | 
| 324 |  | void | 
| 325 |  | _INTERNAL_trim_to_complete_utf8_characters(const char *from, | 
| 326 | 53.1M |                                            const char **fromLimRef) { | 
| 327 | 53.1M |   const char *fromLim = *fromLimRef; | 
| 328 | 53.1M |   size_t walked = 0; | 
| 329 | 53.3M |   for (; fromLim > from; fromLim--, walked++) { | 
| 330 | 53.3M |     const unsigned char prev = (unsigned char)fromLim[-1]; | 
| 331 | 53.3M |     if ((prev & 0xf8u) | 
| 332 | 53.3M |         == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ | 
| 333 | 5.28k |       if (walked + 1 >= 4) { | 
| 334 | 5.04k |         fromLim += 4 - 1; | 
| 335 | 5.04k |         break; | 
| 336 | 5.04k |       } else { | 
| 337 | 244 |         walked = 0; | 
| 338 | 244 |       } | 
| 339 | 53.3M |     } else if ((prev & 0xf0u) | 
| 340 | 53.3M |                == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ | 
| 341 | 25.1k |       if (walked + 1 >= 3) { | 
| 342 | 24.7k |         fromLim += 3 - 1; | 
| 343 | 24.7k |         break; | 
| 344 | 24.7k |       } else { | 
| 345 | 396 |         walked = 0; | 
| 346 | 396 |       } | 
| 347 | 53.3M |     } else if ((prev & 0xe0u) | 
| 348 | 53.3M |                == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ | 
| 349 | 114k |       if (walked + 1 >= 2) { | 
| 350 | 111k |         fromLim += 2 - 1; | 
| 351 | 111k |         break; | 
| 352 | 111k |       } else { | 
| 353 | 2.59k |         walked = 0; | 
| 354 | 2.59k |       } | 
| 355 | 53.1M |     } else if ((prev & 0x80u) | 
| 356 | 53.1M |                == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ | 
| 357 | 53.0M |       break; | 
| 358 | 53.0M |     } | 
| 359 | 53.3M |   } | 
| 360 | 53.1M |   *fromLimRef = fromLim; | 
| 361 | 53.1M | } | 
| 362 |  |  | 
| 363 |  | static enum XML_Convert_Result PTRCALL | 
| 364 |  | utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 365 | 53.1M |             char **toP, const char *toLim) { | 
| 366 | 53.1M |   bool input_incomplete = false; | 
| 367 | 53.1M |   bool output_exhausted = false; | 
| 368 |  |  | 
| 369 |  |   /* Avoid copying partial characters (due to limited space). */ | 
| 370 | 53.1M |   const ptrdiff_t bytesAvailable = fromLim - *fromP; | 
| 371 | 53.1M |   const ptrdiff_t bytesStorable = toLim - *toP; | 
| 372 | 53.1M |   UNUSED_P(enc); | 
| 373 | 53.1M |   if (bytesAvailable > bytesStorable) { | 
| 374 | 223k |     fromLim = *fromP + bytesStorable; | 
| 375 | 223k |     output_exhausted = true; | 
| 376 | 223k |   } | 
| 377 |  |  | 
| 378 |  |   /* Avoid copying partial characters (from incomplete input). */ | 
| 379 | 53.1M |   { | 
| 380 | 53.1M |     const char *const fromLimBefore = fromLim; | 
| 381 | 53.1M |     _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); | 
| 382 | 53.1M |     if (fromLim < fromLimBefore) { | 
| 383 | 3.23k |       input_incomplete = true; | 
| 384 | 3.23k |     } | 
| 385 | 53.1M |   } | 
| 386 |  |  | 
| 387 | 53.1M |   { | 
| 388 | 53.1M |     const ptrdiff_t bytesToCopy = fromLim - *fromP; | 
| 389 | 53.1M |     memcpy(*toP, *fromP, bytesToCopy); | 
| 390 | 53.1M |     *fromP += bytesToCopy; | 
| 391 | 53.1M |     *toP += bytesToCopy; | 
| 392 | 53.1M |   } | 
| 393 |  |  | 
| 394 | 53.1M |   if (output_exhausted) /* needs to go first */ | 
| 395 | 223k |     return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 396 | 52.9M |   else if (input_incomplete) | 
| 397 | 0 |     return XML_CONVERT_INPUT_INCOMPLETE; | 
| 398 | 52.9M |   else | 
| 399 | 52.9M |     return XML_CONVERT_COMPLETED; | 
| 400 | 53.1M | } | 
| 401 |  |  | 
| 402 |  | static enum XML_Convert_Result PTRCALL | 
| 403 |  | utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 404 | 0 |              unsigned short **toP, const unsigned short *toLim) { | 
| 405 | 0 |   enum XML_Convert_Result res = XML_CONVERT_COMPLETED; | 
| 406 | 0 |   unsigned short *to = *toP; | 
| 407 | 0 |   const char *from = *fromP; | 
| 408 | 0 |   while (from < fromLim && to < toLim) { | 
| 409 | 0 |     switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { | 
| 410 | 0 |     case BT_LEAD2: | 
| 411 | 0 |       if (fromLim - from < 2) { | 
| 412 | 0 |         res = XML_CONVERT_INPUT_INCOMPLETE; | 
| 413 | 0 |         goto after; | 
| 414 | 0 |       } | 
| 415 | 0 |       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); | 
| 416 | 0 |       from += 2; | 
| 417 | 0 |       break; | 
| 418 | 0 |     case BT_LEAD3: | 
| 419 | 0 |       if (fromLim - from < 3) { | 
| 420 | 0 |         res = XML_CONVERT_INPUT_INCOMPLETE; | 
| 421 | 0 |         goto after; | 
| 422 | 0 |       } | 
| 423 | 0 |       *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | 
| 424 | 0 |                                | (from[2] & 0x3f)); | 
| 425 | 0 |       from += 3; | 
| 426 | 0 |       break; | 
| 427 | 0 |     case BT_LEAD4: { | 
| 428 | 0 |       unsigned long n; | 
| 429 | 0 |       if (toLim - to < 2) { | 
| 430 | 0 |         res = XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 431 | 0 |         goto after; | 
| 432 | 0 |       } | 
| 433 | 0 |       if (fromLim - from < 4) { | 
| 434 | 0 |         res = XML_CONVERT_INPUT_INCOMPLETE; | 
| 435 | 0 |         goto after; | 
| 436 | 0 |       } | 
| 437 | 0 |       n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | 
| 438 | 0 |           | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); | 
| 439 | 0 |       n -= 0x10000; | 
| 440 | 0 |       to[0] = (unsigned short)((n >> 10) | 0xD800); | 
| 441 | 0 |       to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); | 
| 442 | 0 |       to += 2; | 
| 443 | 0 |       from += 4; | 
| 444 | 0 |     } break; | 
| 445 | 0 |     default: | 
| 446 | 0 |       *to++ = *from++; | 
| 447 | 0 |       break; | 
| 448 | 0 |     } | 
| 449 | 0 |   } | 
| 450 | 0 |   if (from < fromLim) | 
| 451 | 0 |     res = XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 452 | 0 | after: | 
| 453 | 0 |   *fromP = from; | 
| 454 | 0 |   *toP = to; | 
| 455 | 0 |   return res; | 
| 456 | 0 | } | 
| 457 |  |  | 
| 458 |  | #ifdef XML_NS | 
| 459 |  | static const struct normal_encoding utf8_encoding_ns | 
| 460 |  |     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, | 
| 461 |  |        { | 
| 462 |  | #  include "asciitab.h" | 
| 463 |  | #  include "utf8tab.h" | 
| 464 |  |        }, | 
| 465 |  |        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; | 
| 466 |  | #endif | 
| 467 |  |  | 
| 468 |  | static const struct normal_encoding utf8_encoding | 
| 469 |  |     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, | 
| 470 |  |        { | 
| 471 |  | #define BT_COLON BT_NMSTRT | 
| 472 |  | #include "asciitab.h" | 
| 473 |  | #undef BT_COLON | 
| 474 |  | #include "utf8tab.h" | 
| 475 |  |        }, | 
| 476 |  |        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; | 
| 477 |  |  | 
| 478 |  | #ifdef XML_NS | 
| 479 |  |  | 
| 480 |  | static const struct normal_encoding internal_utf8_encoding_ns | 
| 481 |  |     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, | 
| 482 |  |        { | 
| 483 |  | #  include "iasciitab.h" | 
| 484 |  | #  include "utf8tab.h" | 
| 485 |  |        }, | 
| 486 |  |        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; | 
| 487 |  |  | 
| 488 |  | #endif | 
| 489 |  |  | 
| 490 |  | static const struct normal_encoding internal_utf8_encoding | 
| 491 |  |     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, | 
| 492 |  |        { | 
| 493 |  | #define BT_COLON BT_NMSTRT | 
| 494 |  | #include "iasciitab.h" | 
| 495 |  | #undef BT_COLON | 
| 496 |  | #include "utf8tab.h" | 
| 497 |  |        }, | 
| 498 |  |        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; | 
| 499 |  |  | 
| 500 |  | static enum XML_Convert_Result PTRCALL | 
| 501 |  | latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 502 | 156k |               char **toP, const char *toLim) { | 
| 503 | 156k |   UNUSED_P(enc); | 
| 504 | 93.5M |   for (;;) { | 
| 505 | 93.5M |     unsigned char c; | 
| 506 | 93.5M |     if (*fromP == fromLim) | 
| 507 | 145k |       return XML_CONVERT_COMPLETED; | 
| 508 | 93.4M |     c = (unsigned char)**fromP; | 
| 509 | 93.4M |     if (c & 0x80) { | 
| 510 | 90.2M |       if (toLim - *toP < 2) | 
| 511 | 8.38k |         return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 512 | 90.2M |       *(*toP)++ = (char)((c >> 6) | UTF8_cval2); | 
| 513 | 90.2M |       *(*toP)++ = (char)((c & 0x3f) | 0x80); | 
| 514 | 90.2M |       (*fromP)++; | 
| 515 | 90.2M |     } else { | 
| 516 | 3.21M |       if (*toP == toLim) | 
| 517 | 1.82k |         return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 518 | 3.21M |       *(*toP)++ = *(*fromP)++; | 
| 519 | 3.21M |     } | 
| 520 | 93.4M |   } | 
| 521 | 156k | } | 
| 522 |  |  | 
| 523 |  | static enum XML_Convert_Result PTRCALL | 
| 524 |  | latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 525 | 0 |                unsigned short **toP, const unsigned short *toLim) { | 
| 526 | 0 |   UNUSED_P(enc); | 
| 527 | 0 |   while (*fromP < fromLim && *toP < toLim) | 
| 528 | 0 |     *(*toP)++ = (unsigned char)*(*fromP)++; | 
| 529 |  | 
 | 
| 530 | 0 |   if ((*toP == toLim) && (*fromP < fromLim)) | 
| 531 | 0 |     return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 532 | 0 |   else | 
| 533 | 0 |     return XML_CONVERT_COMPLETED; | 
| 534 | 0 | } | 
| 535 |  |  | 
| 536 |  | #ifdef XML_NS | 
| 537 |  |  | 
| 538 |  | static const struct normal_encoding latin1_encoding_ns | 
| 539 |  |     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, | 
| 540 |  |        { | 
| 541 |  | #  include "asciitab.h" | 
| 542 |  | #  include "latin1tab.h" | 
| 543 |  |        }, | 
| 544 |  |        STANDARD_VTABLE(sb_) NULL_VTABLE}; | 
| 545 |  |  | 
| 546 |  | #endif | 
| 547 |  |  | 
| 548 |  | static const struct normal_encoding latin1_encoding | 
| 549 |  |     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, | 
| 550 |  |        { | 
| 551 |  | #define BT_COLON BT_NMSTRT | 
| 552 |  | #include "asciitab.h" | 
| 553 |  | #undef BT_COLON | 
| 554 |  | #include "latin1tab.h" | 
| 555 |  |        }, | 
| 556 |  |        STANDARD_VTABLE(sb_) NULL_VTABLE}; | 
| 557 |  |  | 
| 558 |  | static enum XML_Convert_Result PTRCALL | 
| 559 |  | ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 560 | 949 |              char **toP, const char *toLim) { | 
| 561 | 949 |   UNUSED_P(enc); | 
| 562 | 7.82k |   while (*fromP < fromLim && *toP < toLim) | 
| 563 | 6.87k |     *(*toP)++ = *(*fromP)++; | 
| 564 |  |  | 
| 565 | 949 |   if ((*toP == toLim) && (*fromP < fromLim)) | 
| 566 | 195 |     return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 567 | 754 |   else | 
| 568 | 754 |     return XML_CONVERT_COMPLETED; | 
| 569 | 949 | } | 
| 570 |  |  | 
| 571 |  | #ifdef XML_NS | 
| 572 |  |  | 
| 573 |  | static const struct normal_encoding ascii_encoding_ns | 
| 574 |  |     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, | 
| 575 |  |        { | 
| 576 |  | #  include "asciitab.h" | 
| 577 |  |            /* BT_NONXML == 0 */ | 
| 578 |  |        }, | 
| 579 |  |        STANDARD_VTABLE(sb_) NULL_VTABLE}; | 
| 580 |  |  | 
| 581 |  | #endif | 
| 582 |  |  | 
| 583 |  | static const struct normal_encoding ascii_encoding | 
| 584 |  |     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, | 
| 585 |  |        { | 
| 586 |  | #define BT_COLON BT_NMSTRT | 
| 587 |  | #include "asciitab.h" | 
| 588 |  | #undef BT_COLON | 
| 589 |  |            /* BT_NONXML == 0 */ | 
| 590 |  |        }, | 
| 591 |  |        STANDARD_VTABLE(sb_) NULL_VTABLE}; | 
| 592 |  |  | 
| 593 |  | static int PTRFASTCALL | 
| 594 | 537k | unicode_byte_type(char hi, char lo) { | 
| 595 | 537k |   switch ((unsigned char)hi) { | 
| 596 |  |   /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ | 
| 597 | 22 |   case 0xD8: | 
| 598 | 41 |   case 0xD9: | 
| 599 | 58 |   case 0xDA: | 
| 600 | 75 |   case 0xDB: | 
| 601 | 75 |     return BT_LEAD4; | 
| 602 |  |   /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ | 
| 603 | 4 |   case 0xDC: | 
| 604 | 9 |   case 0xDD: | 
| 605 | 14 |   case 0xDE: | 
| 606 | 20 |   case 0xDF: | 
| 607 | 20 |     return BT_TRAIL; | 
| 608 | 57 |   case 0xFF: | 
| 609 | 57 |     switch ((unsigned char)lo) { | 
| 610 | 5 |     case 0xFF: /* noncharacter-FFFF */ | 
| 611 | 6 |     case 0xFE: /* noncharacter-FFFE */ | 
| 612 | 6 |       return BT_NONXML; | 
| 613 | 57 |     } | 
| 614 | 51 |     break; | 
| 615 | 537k |   } | 
| 616 | 537k |   return BT_NONASCII; | 
| 617 | 537k | } | 
| 618 |  |  | 
| 619 |  | #define DEFINE_UTF16_TO_UTF8(E)                                                \ | 
| 620 |  |   static enum XML_Convert_Result PTRCALL E##toUtf8(                            \ | 
| 621 |  |       const ENCODING *enc, const char **fromP, const char *fromLim,            \ | 
| 622 | 0 |       char **toP, const char *toLim) {                                         \ | 
| 623 | 0 |     const char *from = *fromP;                                                 \ | 
| 624 | 0 |     UNUSED_P(enc);                                                             \ | 
| 625 | 0 |     fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \ | 
| 626 | 0 |     for (; from < fromLim; from += 2) {                                        \ | 
| 627 | 0 |       int plane;                                                               \ | 
| 628 | 0 |       unsigned char lo2;                                                       \ | 
| 629 | 0 |       unsigned char lo = GET_LO(from);                                         \ | 
| 630 | 0 |       unsigned char hi = GET_HI(from);                                         \ | 
| 631 | 0 |       switch (hi) {                                                            \ | 
| 632 | 0 |       case 0:                                                                  \ | 
| 633 | 0 |         if (lo < 0x80) {                                                       \ | 
| 634 | 0 |           if (*toP == toLim) {                                                 \ | 
| 635 | 0 |             *fromP = from;                                                     \ | 
| 636 | 0 |             return XML_CONVERT_OUTPUT_EXHAUSTED;                               \ | 
| 637 | 0 |           }                                                                    \ | 
| 638 | 0 |           *(*toP)++ = lo;                                                      \ | 
| 639 | 0 |           break;                                                               \ | 
| 640 | 0 |         }                                                                      \ | 
| 641 | 0 |         /* fall through */                                                     \ | 
| 642 | 0 |       case 0x1:                                                                \ | 
| 643 | 0 |       case 0x2:                                                                \ | 
| 644 | 0 |       case 0x3:                                                                \ | 
| 645 | 0 |       case 0x4:                                                                \ | 
| 646 | 0 |       case 0x5:                                                                \ | 
| 647 | 0 |       case 0x6:                                                                \ | 
| 648 | 0 |       case 0x7:                                                                \ | 
| 649 | 0 |         if (toLim - *toP < 2) {                                                \ | 
| 650 | 0 |           *fromP = from;                                                       \ | 
| 651 | 0 |           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \ | 
| 652 | 0 |         }                                                                      \ | 
| 653 | 0 |         *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \ | 
| 654 | 0 |         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \ | 
| 655 | 0 |         break;                                                                 \ | 
| 656 | 0 |       default:                                                                 \ | 
| 657 | 0 |         if (toLim - *toP < 3) {                                                \ | 
| 658 | 0 |           *fromP = from;                                                       \ | 
| 659 | 0 |           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \ | 
| 660 | 0 |         }                                                                      \ | 
| 661 | 0 |         /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \ | 
| 662 | 0 |         *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \ | 
| 663 | 0 |         *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \ | 
| 664 | 0 |         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \ | 
| 665 | 0 |         break;                                                                 \ | 
| 666 | 0 |       case 0xD8:                                                               \ | 
| 667 | 0 |       case 0xD9:                                                               \ | 
| 668 | 0 |       case 0xDA:                                                               \ | 
| 669 | 0 |       case 0xDB:                                                               \ | 
| 670 | 0 |         if (toLim - *toP < 4) {                                                \ | 
| 671 | 0 |           *fromP = from;                                                       \ | 
| 672 | 0 |           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \ | 
| 673 | 0 |         }                                                                      \ | 
| 674 | 0 |         if (fromLim - from < 4) {                                              \ | 
| 675 | 0 |           *fromP = from;                                                       \ | 
| 676 | 0 |           return XML_CONVERT_INPUT_INCOMPLETE;                                 \ | 
| 677 | 0 |         }                                                                      \ | 
| 678 | 0 |         plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \ | 
| 679 | 0 |         *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \ | 
| 680 | 0 |         *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \ | 
| 681 | 0 |         from += 2;                                                             \ | 
| 682 | 0 |         lo2 = GET_LO(from);                                                    \ | 
| 683 | 0 |         *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \ | 
| 684 | 0 |                      | (lo2 >> 6) | 0x80);                                     \ | 
| 685 | 0 |         *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \ | 
| 686 | 0 |         break;                                                                 \ | 
| 687 | 0 |       }                                                                        \ | 
| 688 | 0 |     }                                                                          \ | 
| 689 | 0 |     *fromP = from;                                                             \ | 
| 690 | 0 |     if (from < fromLim)                                                        \ | 
| 691 | 0 |       return XML_CONVERT_INPUT_INCOMPLETE;                                     \ | 
| 692 | 0 |     else                                                                       \ | 
| 693 | 0 |       return XML_CONVERT_COMPLETED;                                            \ | 
| 694 | 0 |   } Unexecuted instantiation: xmltok.c:little2_toUtf8Unexecuted instantiation: xmltok.c:big2_toUtf8 | 
| 695 |  |  | 
| 696 |  | #define DEFINE_UTF16_TO_UTF16(E)                                               \ | 
| 697 |  |   static enum XML_Convert_Result PTRCALL E##toUtf16(                           \ | 
| 698 |  |       const ENCODING *enc, const char **fromP, const char *fromLim,            \ | 
| 699 | 0 |       unsigned short **toP, const unsigned short *toLim) {                     \ | 
| 700 | 0 |     enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \ | 
| 701 | 0 |     UNUSED_P(enc);                                                             \ | 
| 702 | 0 |     fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \ | 
| 703 | 0 |     /* Avoid copying first half only of surrogate */                           \ | 
| 704 | 0 |     if (fromLim - *fromP > ((toLim - *toP) << 1)                               \ | 
| 705 | 0 |         && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \ | 
| 706 | 0 |       fromLim -= 2;                                                            \ | 
| 707 | 0 |       res = XML_CONVERT_INPUT_INCOMPLETE;                                      \ | 
| 708 | 0 |     }                                                                          \ | 
| 709 | 0 |     for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \ | 
| 710 | 0 |       *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \ | 
| 711 | 0 |     if ((*toP == toLim) && (*fromP < fromLim))                                 \ | 
| 712 | 0 |       return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \ | 
| 713 | 0 |     else                                                                       \ | 
| 714 | 0 |       return res;                                                              \ | 
| 715 | 0 |   } Unexecuted instantiation: xmltok.c:little2_toUtf16Unexecuted instantiation: xmltok.c:big2_toUtf16 | 
| 716 |  |  | 
| 717 |  | #define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) | 
| 718 | 0 | #define GET_LO(ptr) ((unsigned char)(ptr)[0]) | 
| 719 | 0 | #define GET_HI(ptr) ((unsigned char)(ptr)[1]) | 
| 720 |  |  | 
| 721 |  | DEFINE_UTF16_TO_UTF8(little2_) | 
| 722 |  | DEFINE_UTF16_TO_UTF16(little2_) | 
| 723 |  |  | 
| 724 |  | #undef SET2 | 
| 725 |  | #undef GET_LO | 
| 726 |  | #undef GET_HI | 
| 727 |  |  | 
| 728 |  | #define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) | 
| 729 | 0 | #define GET_LO(ptr) ((unsigned char)(ptr)[1]) | 
| 730 | 0 | #define GET_HI(ptr) ((unsigned char)(ptr)[0]) | 
| 731 |  |  | 
| 732 |  | DEFINE_UTF16_TO_UTF8(big2_) | 
| 733 |  | DEFINE_UTF16_TO_UTF16(big2_) | 
| 734 |  |  | 
| 735 |  | #undef SET2 | 
| 736 |  | #undef GET_LO | 
| 737 |  | #undef GET_HI | 
| 738 |  |  | 
| 739 |  | #define LITTLE2_BYTE_TYPE(enc, p)                                              \ | 
| 740 | 379k |   ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]  \ | 
| 741 | 379k |                : unicode_byte_type((p)[1], (p)[0])) | 
| 742 | 0 | #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) | 
| 743 | 0 | #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c) | 
| 744 |  | #define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \ | 
| 745 | 189k |   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) | 
| 746 |  | #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \ | 
| 747 | 121 |   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) | 
| 748 |  |  | 
| 749 |  | #ifdef XML_MIN_SIZE | 
| 750 |  |  | 
| 751 |  | static int PTRFASTCALL | 
| 752 |  | little2_byteType(const ENCODING *enc, const char *p) { | 
| 753 |  |   return LITTLE2_BYTE_TYPE(enc, p); | 
| 754 |  | } | 
| 755 |  |  | 
| 756 |  | static int PTRFASTCALL | 
| 757 |  | little2_byteToAscii(const ENCODING *enc, const char *p) { | 
| 758 |  |   UNUSED_P(enc); | 
| 759 |  |   return LITTLE2_BYTE_TO_ASCII(p); | 
| 760 |  | } | 
| 761 |  |  | 
| 762 |  | static int PTRCALL | 
| 763 |  | little2_charMatches(const ENCODING *enc, const char *p, int c) { | 
| 764 |  |   UNUSED_P(enc); | 
| 765 |  |   return LITTLE2_CHAR_MATCHES(p, c); | 
| 766 |  | } | 
| 767 |  |  | 
| 768 |  | static int PTRFASTCALL | 
| 769 |  | little2_isNameMin(const ENCODING *enc, const char *p) { | 
| 770 |  |   UNUSED_P(enc); | 
| 771 |  |   return LITTLE2_IS_NAME_CHAR_MINBPC(p); | 
| 772 |  | } | 
| 773 |  |  | 
| 774 |  | static int PTRFASTCALL | 
| 775 |  | little2_isNmstrtMin(const ENCODING *enc, const char *p) { | 
| 776 |  |   UNUSED_P(enc); | 
| 777 |  |   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); | 
| 778 |  | } | 
| 779 |  |  | 
| 780 |  | #  undef VTABLE | 
| 781 |  | #  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 | 
| 782 |  |  | 
| 783 |  | #else /* not XML_MIN_SIZE */ | 
| 784 |  |  | 
| 785 |  | #  undef PREFIX | 
| 786 | 0 | #  define PREFIX(ident) little2_##ident | 
| 787 | 759k | #  define MINBPC(enc) 2 | 
| 788 |  | /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ | 
| 789 | 379k | #  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) | 
| 790 | 0 | #  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) | 
| 791 | 0 | #  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) | 
| 792 | 5 | #  define IS_NAME_CHAR(enc, p, n) 0 | 
| 793 | 189k | #  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) | 
| 794 | 5 | #  define IS_NMSTRT_CHAR(enc, p, n) (0) | 
| 795 | 121 | #  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) | 
| 796 |  |  | 
| 797 |  | #  define XML_TOK_IMPL_C | 
| 798 |  | #  include "xmltok_impl.c" | 
| 799 |  | #  undef XML_TOK_IMPL_C | 
| 800 |  |  | 
| 801 |  | #  undef MINBPC | 
| 802 |  | #  undef BYTE_TYPE | 
| 803 |  | #  undef BYTE_TO_ASCII | 
| 804 |  | #  undef CHAR_MATCHES | 
| 805 |  | #  undef IS_NAME_CHAR | 
| 806 |  | #  undef IS_NAME_CHAR_MINBPC | 
| 807 |  | #  undef IS_NMSTRT_CHAR | 
| 808 |  | #  undef IS_NMSTRT_CHAR_MINBPC | 
| 809 |  | #  undef IS_INVALID_CHAR | 
| 810 |  |  | 
| 811 |  | #endif /* not XML_MIN_SIZE */ | 
| 812 |  |  | 
| 813 |  | #ifdef XML_NS | 
| 814 |  |  | 
| 815 |  | static const struct normal_encoding little2_encoding_ns | 
| 816 |  |     = {{VTABLE, 2, 0, | 
| 817 |  | #  if BYTEORDER == 1234 | 
| 818 |  |         1 | 
| 819 |  | #  else | 
| 820 |  |         0 | 
| 821 |  | #  endif | 
| 822 |  |        }, | 
| 823 |  |        { | 
| 824 |  | #  include "asciitab.h" | 
| 825 |  | #  include "latin1tab.h" | 
| 826 |  |        }, | 
| 827 |  |        STANDARD_VTABLE(little2_) NULL_VTABLE}; | 
| 828 |  |  | 
| 829 |  | #endif | 
| 830 |  |  | 
| 831 |  | static const struct normal_encoding little2_encoding | 
| 832 |  |     = {{VTABLE, 2, 0, | 
| 833 |  | #if BYTEORDER == 1234 | 
| 834 |  |         1 | 
| 835 |  | #else | 
| 836 |  |         0 | 
| 837 |  | #endif | 
| 838 |  |        }, | 
| 839 |  |        { | 
| 840 |  | #define BT_COLON BT_NMSTRT | 
| 841 |  | #include "asciitab.h" | 
| 842 |  | #undef BT_COLON | 
| 843 |  | #include "latin1tab.h" | 
| 844 |  |        }, | 
| 845 |  |        STANDARD_VTABLE(little2_) NULL_VTABLE}; | 
| 846 |  |  | 
| 847 |  | #if BYTEORDER != 4321 | 
| 848 |  |  | 
| 849 |  | #  ifdef XML_NS | 
| 850 |  |  | 
| 851 |  | static const struct normal_encoding internal_little2_encoding_ns | 
| 852 |  |     = {{VTABLE, 2, 0, 1}, | 
| 853 |  |        { | 
| 854 |  | #    include "iasciitab.h" | 
| 855 |  | #    include "latin1tab.h" | 
| 856 |  |        }, | 
| 857 |  |        STANDARD_VTABLE(little2_) NULL_VTABLE}; | 
| 858 |  |  | 
| 859 |  | #  endif | 
| 860 |  |  | 
| 861 |  | static const struct normal_encoding internal_little2_encoding | 
| 862 |  |     = {{VTABLE, 2, 0, 1}, | 
| 863 |  |        { | 
| 864 |  | #  define BT_COLON BT_NMSTRT | 
| 865 |  | #  include "iasciitab.h" | 
| 866 |  | #  undef BT_COLON | 
| 867 |  | #  include "latin1tab.h" | 
| 868 |  |        }, | 
| 869 |  |        STANDARD_VTABLE(little2_) NULL_VTABLE}; | 
| 870 |  |  | 
| 871 |  | #endif | 
| 872 |  |  | 
| 873 |  | #define BIG2_BYTE_TYPE(enc, p)                                                 \ | 
| 874 | 158k |   ((p)[0] == 0                                                                 \ | 
| 875 | 158k |        ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]]        \ | 
| 876 | 158k |        : unicode_byte_type((p)[0], (p)[1])) | 
| 877 | 0 | #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) | 
| 878 | 0 | #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c) | 
| 879 |  | #define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \ | 
| 880 | 79.3k |   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) | 
| 881 |  | #define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \ | 
| 882 | 92 |   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) | 
| 883 |  |  | 
| 884 |  | #ifdef XML_MIN_SIZE | 
| 885 |  |  | 
| 886 |  | static int PTRFASTCALL | 
| 887 |  | big2_byteType(const ENCODING *enc, const char *p) { | 
| 888 |  |   return BIG2_BYTE_TYPE(enc, p); | 
| 889 |  | } | 
| 890 |  |  | 
| 891 |  | static int PTRFASTCALL | 
| 892 |  | big2_byteToAscii(const ENCODING *enc, const char *p) { | 
| 893 |  |   UNUSED_P(enc); | 
| 894 |  |   return BIG2_BYTE_TO_ASCII(p); | 
| 895 |  | } | 
| 896 |  |  | 
| 897 |  | static int PTRCALL | 
| 898 |  | big2_charMatches(const ENCODING *enc, const char *p, int c) { | 
| 899 |  |   UNUSED_P(enc); | 
| 900 |  |   return BIG2_CHAR_MATCHES(p, c); | 
| 901 |  | } | 
| 902 |  |  | 
| 903 |  | static int PTRFASTCALL | 
| 904 |  | big2_isNameMin(const ENCODING *enc, const char *p) { | 
| 905 |  |   UNUSED_P(enc); | 
| 906 |  |   return BIG2_IS_NAME_CHAR_MINBPC(p); | 
| 907 |  | } | 
| 908 |  |  | 
| 909 |  | static int PTRFASTCALL | 
| 910 |  | big2_isNmstrtMin(const ENCODING *enc, const char *p) { | 
| 911 |  |   UNUSED_P(enc); | 
| 912 |  |   return BIG2_IS_NMSTRT_CHAR_MINBPC(p); | 
| 913 |  | } | 
| 914 |  |  | 
| 915 |  | #  undef VTABLE | 
| 916 |  | #  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 | 
| 917 |  |  | 
| 918 |  | #else /* not XML_MIN_SIZE */ | 
| 919 |  |  | 
| 920 |  | #  undef PREFIX | 
| 921 | 0 | #  define PREFIX(ident) big2_##ident | 
| 922 | 317k | #  define MINBPC(enc) 2 | 
| 923 |  | /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ | 
| 924 | 158k | #  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) | 
| 925 | 0 | #  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) | 
| 926 | 0 | #  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) | 
| 927 | 16 | #  define IS_NAME_CHAR(enc, p, n) 0 | 
| 928 | 79.3k | #  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) | 
| 929 | 16 | #  define IS_NMSTRT_CHAR(enc, p, n) (0) | 
| 930 | 92 | #  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) | 
| 931 |  |  | 
| 932 |  | #  define XML_TOK_IMPL_C | 
| 933 |  | #  include "xmltok_impl.c" | 
| 934 |  | #  undef XML_TOK_IMPL_C | 
| 935 |  |  | 
| 936 |  | #  undef MINBPC | 
| 937 |  | #  undef BYTE_TYPE | 
| 938 |  | #  undef BYTE_TO_ASCII | 
| 939 |  | #  undef CHAR_MATCHES | 
| 940 |  | #  undef IS_NAME_CHAR | 
| 941 |  | #  undef IS_NAME_CHAR_MINBPC | 
| 942 |  | #  undef IS_NMSTRT_CHAR | 
| 943 |  | #  undef IS_NMSTRT_CHAR_MINBPC | 
| 944 |  | #  undef IS_INVALID_CHAR | 
| 945 |  |  | 
| 946 |  | #endif /* not XML_MIN_SIZE */ | 
| 947 |  |  | 
| 948 |  | #ifdef XML_NS | 
| 949 |  |  | 
| 950 |  | static const struct normal_encoding big2_encoding_ns | 
| 951 |  |     = {{VTABLE, 2, 0, | 
| 952 |  | #  if BYTEORDER == 4321 | 
| 953 |  |         1 | 
| 954 |  | #  else | 
| 955 |  |         0 | 
| 956 |  | #  endif | 
| 957 |  |        }, | 
| 958 |  |        { | 
| 959 |  | #  include "asciitab.h" | 
| 960 |  | #  include "latin1tab.h" | 
| 961 |  |        }, | 
| 962 |  |        STANDARD_VTABLE(big2_) NULL_VTABLE}; | 
| 963 |  |  | 
| 964 |  | #endif | 
| 965 |  |  | 
| 966 |  | static const struct normal_encoding big2_encoding | 
| 967 |  |     = {{VTABLE, 2, 0, | 
| 968 |  | #if BYTEORDER == 4321 | 
| 969 |  |         1 | 
| 970 |  | #else | 
| 971 |  |         0 | 
| 972 |  | #endif | 
| 973 |  |        }, | 
| 974 |  |        { | 
| 975 |  | #define BT_COLON BT_NMSTRT | 
| 976 |  | #include "asciitab.h" | 
| 977 |  | #undef BT_COLON | 
| 978 |  | #include "latin1tab.h" | 
| 979 |  |        }, | 
| 980 |  |        STANDARD_VTABLE(big2_) NULL_VTABLE}; | 
| 981 |  |  | 
| 982 |  | #if BYTEORDER != 1234 | 
| 983 |  |  | 
| 984 |  | #  ifdef XML_NS | 
| 985 |  |  | 
| 986 |  | static const struct normal_encoding internal_big2_encoding_ns | 
| 987 |  |     = {{VTABLE, 2, 0, 1}, | 
| 988 |  |        { | 
| 989 |  | #    include "iasciitab.h" | 
| 990 |  | #    include "latin1tab.h" | 
| 991 |  |        }, | 
| 992 |  |        STANDARD_VTABLE(big2_) NULL_VTABLE}; | 
| 993 |  |  | 
| 994 |  | #  endif | 
| 995 |  |  | 
| 996 |  | static const struct normal_encoding internal_big2_encoding | 
| 997 |  |     = {{VTABLE, 2, 0, 1}, | 
| 998 |  |        { | 
| 999 |  | #  define BT_COLON BT_NMSTRT | 
| 1000 |  | #  include "iasciitab.h" | 
| 1001 |  | #  undef BT_COLON | 
| 1002 |  | #  include "latin1tab.h" | 
| 1003 |  |        }, | 
| 1004 |  |        STANDARD_VTABLE(big2_) NULL_VTABLE}; | 
| 1005 |  |  | 
| 1006 |  | #endif | 
| 1007 |  |  | 
| 1008 |  | #undef PREFIX | 
| 1009 |  |  | 
| 1010 |  | static int FASTCALL | 
| 1011 | 3.22k | streqci(const char *s1, const char *s2) { | 
| 1012 | 17.6k |   for (;;) { | 
| 1013 | 17.6k |     char c1 = *s1++; | 
| 1014 | 17.6k |     char c2 = *s2++; | 
| 1015 | 17.6k |     if (ASCII_a <= c1 && c1 <= ASCII_z) | 
| 1016 | 610 |       c1 += ASCII_A - ASCII_a; | 
| 1017 | 17.6k |     if (ASCII_a <= c2 && c2 <= ASCII_z) | 
| 1018 |  |       /* The following line will never get executed.  streqci() is | 
| 1019 |  |        * only called from two places, both of which guarantee to put | 
| 1020 |  |        * upper-case strings into s2. | 
| 1021 |  |        */ | 
| 1022 | 0 |       c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ | 
| 1023 | 17.6k |     if (c1 != c2) | 
| 1024 | 1.80k |       return 0; | 
| 1025 | 15.8k |     if (! c1) | 
| 1026 | 1.41k |       break; | 
| 1027 | 15.8k |   } | 
| 1028 | 1.41k |   return 1; | 
| 1029 | 3.22k | } | 
| 1030 |  |  | 
| 1031 |  | static void PTRCALL | 
| 1032 |  | initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, | 
| 1033 | 27 |                    POSITION *pos) { | 
| 1034 | 27 |   UNUSED_P(enc); | 
| 1035 | 27 |   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); | 
| 1036 | 27 | } | 
| 1037 |  |  | 
| 1038 |  | static int | 
| 1039 | 215k | toAscii(const ENCODING *enc, const char *ptr, const char *end) { | 
| 1040 | 215k |   char buf[1]; | 
| 1041 | 215k |   char *p = buf; | 
| 1042 | 215k |   XmlUtf8Convert(enc, &ptr, end, &p, p + 1); | 
| 1043 | 215k |   if (p == buf) | 
| 1044 | 566 |     return -1; | 
| 1045 | 214k |   else | 
| 1046 | 214k |     return buf[0]; | 
| 1047 | 215k | } | 
| 1048 |  |  | 
| 1049 |  | static int FASTCALL | 
| 1050 | 41.0k | isSpace(int c) { | 
| 1051 | 41.0k |   switch (c) { | 
| 1052 | 4.21k |   case 0x20: | 
| 1053 | 6.87k |   case 0xD: | 
| 1054 | 9.07k |   case 0xA: | 
| 1055 | 10.0k |   case 0x9: | 
| 1056 | 10.0k |     return 1; | 
| 1057 | 41.0k |   } | 
| 1058 | 31.0k |   return 0; | 
| 1059 | 41.0k | } | 
| 1060 |  |  | 
| 1061 |  | /* Return 1 if there's just optional white space or there's an S | 
| 1062 |  |    followed by name=val. | 
| 1063 |  | */ | 
| 1064 |  | static int | 
| 1065 |  | parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, | 
| 1066 |  |                      const char **namePtr, const char **nameEndPtr, | 
| 1067 | 4.83k |                      const char **valPtr, const char **nextTokPtr) { | 
| 1068 | 4.83k |   int c; | 
| 1069 | 4.83k |   char open; | 
| 1070 | 4.83k |   if (ptr == end) { | 
| 1071 | 1.16k |     *namePtr = NULL; | 
| 1072 | 1.16k |     return 1; | 
| 1073 | 1.16k |   } | 
| 1074 | 3.66k |   if (! isSpace(toAscii(enc, ptr, end))) { | 
| 1075 | 8 |     *nextTokPtr = ptr; | 
| 1076 | 8 |     return 0; | 
| 1077 | 8 |   } | 
| 1078 | 4.61k |   do { | 
| 1079 | 4.61k |     ptr += enc->minBytesPerChar; | 
| 1080 | 4.61k |   } while (isSpace(toAscii(enc, ptr, end))); | 
| 1081 | 3.65k |   if (ptr == end) { | 
| 1082 | 349 |     *namePtr = NULL; | 
| 1083 | 349 |     return 1; | 
| 1084 | 349 |   } | 
| 1085 | 3.31k |   *namePtr = ptr; | 
| 1086 | 27.2k |   for (;;) { | 
| 1087 | 27.2k |     c = toAscii(enc, ptr, end); | 
| 1088 | 27.2k |     if (c == -1) { | 
| 1089 | 15 |       *nextTokPtr = ptr; | 
| 1090 | 15 |       return 0; | 
| 1091 | 15 |     } | 
| 1092 | 27.2k |     if (c == ASCII_EQUALS) { | 
| 1093 | 3.22k |       *nameEndPtr = ptr; | 
| 1094 | 3.22k |       break; | 
| 1095 | 3.22k |     } | 
| 1096 | 24.0k |     if (isSpace(c)) { | 
| 1097 | 75 |       *nameEndPtr = ptr; | 
| 1098 | 3.40k |       do { | 
| 1099 | 3.40k |         ptr += enc->minBytesPerChar; | 
| 1100 | 3.40k |       } while (isSpace(c = toAscii(enc, ptr, end))); | 
| 1101 | 75 |       if (c != ASCII_EQUALS) { | 
| 1102 | 54 |         *nextTokPtr = ptr; | 
| 1103 | 54 |         return 0; | 
| 1104 | 54 |       } | 
| 1105 | 21 |       break; | 
| 1106 | 75 |     } | 
| 1107 | 23.9k |     ptr += enc->minBytesPerChar; | 
| 1108 | 23.9k |   } | 
| 1109 | 3.24k |   if (ptr == *namePtr) { | 
| 1110 | 2 |     *nextTokPtr = ptr; | 
| 1111 | 2 |     return 0; | 
| 1112 | 2 |   } | 
| 1113 | 3.23k |   ptr += enc->minBytesPerChar; | 
| 1114 | 3.23k |   c = toAscii(enc, ptr, end); | 
| 1115 | 4.19k |   while (isSpace(c)) { | 
| 1116 | 956 |     ptr += enc->minBytesPerChar; | 
| 1117 | 956 |     c = toAscii(enc, ptr, end); | 
| 1118 | 956 |   } | 
| 1119 | 3.23k |   if (c != ASCII_QUOT && c != ASCII_APOS) { | 
| 1120 | 44 |     *nextTokPtr = ptr; | 
| 1121 | 44 |     return 0; | 
| 1122 | 44 |   } | 
| 1123 | 3.19k |   open = (char)c; | 
| 1124 | 3.19k |   ptr += enc->minBytesPerChar; | 
| 1125 | 3.19k |   *valPtr = ptr; | 
| 1126 | 169k |   for (;; ptr += enc->minBytesPerChar) { | 
| 1127 | 169k |     c = toAscii(enc, ptr, end); | 
| 1128 | 169k |     if (c == open) | 
| 1129 | 3.12k |       break; | 
| 1130 | 166k |     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) | 
| 1131 | 166k |         && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD | 
| 1132 | 166k |         && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { | 
| 1133 | 70 |       *nextTokPtr = ptr; | 
| 1134 | 70 |       return 0; | 
| 1135 | 70 |     } | 
| 1136 | 166k |   } | 
| 1137 | 3.12k |   *nextTokPtr = ptr + enc->minBytesPerChar; | 
| 1138 | 3.12k |   return 1; | 
| 1139 | 3.19k | } | 
| 1140 |  |  | 
| 1141 |  | static const char KW_version[] | 
| 1142 |  |     = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; | 
| 1143 |  |  | 
| 1144 |  | static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, | 
| 1145 |  |                                    ASCII_i, ASCII_n, ASCII_g, '\0'}; | 
| 1146 |  |  | 
| 1147 |  | static const char KW_standalone[] | 
| 1148 |  |     = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, | 
| 1149 |  |        ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; | 
| 1150 |  |  | 
| 1151 |  | static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; | 
| 1152 |  |  | 
| 1153 |  | static const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; | 
| 1154 |  |  | 
| 1155 |  | static int | 
| 1156 |  | doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, | 
| 1157 |  |                                                  const char *), | 
| 1158 |  |                int isGeneralTextEntity, const ENCODING *enc, const char *ptr, | 
| 1159 |  |                const char *end, const char **badPtr, const char **versionPtr, | 
| 1160 |  |                const char **versionEndPtr, const char **encodingName, | 
| 1161 | 1.78k |                const ENCODING **encoding, int *standalone) { | 
| 1162 | 1.78k |   const char *val = NULL; | 
| 1163 | 1.78k |   const char *name = NULL; | 
| 1164 | 1.78k |   const char *nameEnd = NULL; | 
| 1165 | 1.78k |   ptr += 5 * enc->minBytesPerChar; | 
| 1166 | 1.78k |   end -= 2 * enc->minBytesPerChar; | 
| 1167 | 1.78k |   if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) | 
| 1168 | 1.78k |       || ! name) { | 
| 1169 | 212 |     *badPtr = ptr; | 
| 1170 | 212 |     return 0; | 
| 1171 | 212 |   } | 
| 1172 | 1.57k |   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { | 
| 1173 | 3 |     if (! isGeneralTextEntity) { | 
| 1174 | 3 |       *badPtr = name; | 
| 1175 | 3 |       return 0; | 
| 1176 | 3 |     } | 
| 1177 | 1.57k |   } else { | 
| 1178 | 1.57k |     if (versionPtr) | 
| 1179 | 1.57k |       *versionPtr = val; | 
| 1180 | 1.57k |     if (versionEndPtr) | 
| 1181 | 1.57k |       *versionEndPtr = ptr; | 
| 1182 | 1.57k |     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { | 
| 1183 | 10 |       *badPtr = ptr; | 
| 1184 | 10 |       return 0; | 
| 1185 | 10 |     } | 
| 1186 | 1.56k |     if (! name) { | 
| 1187 | 13 |       if (isGeneralTextEntity) { | 
| 1188 |  |         /* a TextDecl must have an EncodingDecl */ | 
| 1189 | 0 |         *badPtr = ptr; | 
| 1190 | 0 |         return 0; | 
| 1191 | 0 |       } | 
| 1192 | 13 |       return 1; | 
| 1193 | 13 |     } | 
| 1194 | 1.56k |   } | 
| 1195 | 1.54k |   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { | 
| 1196 | 1.47k |     int c = toAscii(enc, val, end); | 
| 1197 | 1.47k |     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { | 
| 1198 | 3 |       *badPtr = val; | 
| 1199 | 3 |       return 0; | 
| 1200 | 3 |     } | 
| 1201 | 1.47k |     if (encodingName) | 
| 1202 | 1.47k |       *encodingName = val; | 
| 1203 | 1.47k |     if (encoding) | 
| 1204 | 1.47k |       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); | 
| 1205 | 1.47k |     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { | 
| 1206 | 11 |       *badPtr = ptr; | 
| 1207 | 11 |       return 0; | 
| 1208 | 11 |     } | 
| 1209 | 1.46k |     if (! name) | 
| 1210 | 1.45k |       return 1; | 
| 1211 | 1.46k |   } | 
| 1212 | 76 |   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) | 
| 1213 | 76 |       || isGeneralTextEntity) { | 
| 1214 | 3 |     *badPtr = name; | 
| 1215 | 3 |     return 0; | 
| 1216 | 3 |   } | 
| 1217 | 73 |   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { | 
| 1218 | 34 |     if (standalone) | 
| 1219 | 34 |       *standalone = 1; | 
| 1220 | 39 |   } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { | 
| 1221 | 38 |     if (standalone) | 
| 1222 | 38 |       *standalone = 0; | 
| 1223 | 38 |   } else { | 
| 1224 | 1 |     *badPtr = val; | 
| 1225 | 1 |     return 0; | 
| 1226 | 1 |   } | 
| 1227 | 1.13k |   while (isSpace(toAscii(enc, ptr, end))) | 
| 1228 | 1.06k |     ptr += enc->minBytesPerChar; | 
| 1229 | 72 |   if (ptr != end) { | 
| 1230 | 2 |     *badPtr = ptr; | 
| 1231 | 2 |     return 0; | 
| 1232 | 2 |   } | 
| 1233 | 70 |   return 1; | 
| 1234 | 72 | } | 
| 1235 |  |  | 
| 1236 |  | static int FASTCALL | 
| 1237 | 64.6k | checkCharRefNumber(int result) { | 
| 1238 | 64.6k |   switch (result >> 8) { | 
| 1239 | 1 |   case 0xD8: | 
| 1240 | 2 |   case 0xD9: | 
| 1241 | 3 |   case 0xDA: | 
| 1242 | 4 |   case 0xDB: | 
| 1243 | 5 |   case 0xDC: | 
| 1244 | 6 |   case 0xDD: | 
| 1245 | 8 |   case 0xDE: | 
| 1246 | 9 |   case 0xDF: | 
| 1247 | 9 |     return -1; | 
| 1248 | 8.86k |   case 0: | 
| 1249 | 8.86k |     if (latin1_encoding.type[result] == BT_NONXML) | 
| 1250 | 25 |       return -1; | 
| 1251 | 8.83k |     break; | 
| 1252 | 8.83k |   case 0xFF: | 
| 1253 | 977 |     if (result == 0xFFFE || result == 0xFFFF) | 
| 1254 | 4 |       return -1; | 
| 1255 | 973 |     break; | 
| 1256 | 64.6k |   } | 
| 1257 | 64.5k |   return result; | 
| 1258 | 64.6k | } | 
| 1259 |  |  | 
| 1260 |  | int FASTCALL | 
| 1261 | 64.1k | XmlUtf8Encode(int c, char *buf) { | 
| 1262 | 64.1k |   enum { | 
| 1263 |  |     /* minN is minimum legal resulting value for N byte sequence */ | 
| 1264 | 64.1k |     min2 = 0x80, | 
| 1265 | 64.1k |     min3 = 0x800, | 
| 1266 | 64.1k |     min4 = 0x10000 | 
| 1267 | 64.1k |   }; | 
| 1268 |  |  | 
| 1269 | 64.1k |   if (c < 0) | 
| 1270 | 0 |     return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ | 
| 1271 | 64.1k |   if (c < min2) { | 
| 1272 | 5.36k |     buf[0] = (char)(c | UTF8_cval1); | 
| 1273 | 5.36k |     return 1; | 
| 1274 | 5.36k |   } | 
| 1275 | 58.8k |   if (c < min3) { | 
| 1276 | 7.16k |     buf[0] = (char)((c >> 6) | UTF8_cval2); | 
| 1277 | 7.16k |     buf[1] = (char)((c & 0x3f) | 0x80); | 
| 1278 | 7.16k |     return 2; | 
| 1279 | 7.16k |   } | 
| 1280 | 51.6k |   if (c < min4) { | 
| 1281 | 19.8k |     buf[0] = (char)((c >> 12) | UTF8_cval3); | 
| 1282 | 19.8k |     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); | 
| 1283 | 19.8k |     buf[2] = (char)((c & 0x3f) | 0x80); | 
| 1284 | 19.8k |     return 3; | 
| 1285 | 19.8k |   } | 
| 1286 | 31.7k |   if (c < 0x110000) { | 
| 1287 | 31.7k |     buf[0] = (char)((c >> 18) | UTF8_cval4); | 
| 1288 | 31.7k |     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); | 
| 1289 | 31.7k |     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); | 
| 1290 | 31.7k |     buf[3] = (char)((c & 0x3f) | 0x80); | 
| 1291 | 31.7k |     return 4; | 
| 1292 | 31.7k |   } | 
| 1293 | 0 |   return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ | 
| 1294 | 31.7k | } | 
| 1295 |  |  | 
| 1296 |  | int FASTCALL | 
| 1297 | 0 | XmlUtf16Encode(int charNum, unsigned short *buf) { | 
| 1298 | 0 |   if (charNum < 0) | 
| 1299 | 0 |     return 0; | 
| 1300 | 0 |   if (charNum < 0x10000) { | 
| 1301 | 0 |     buf[0] = (unsigned short)charNum; | 
| 1302 | 0 |     return 1; | 
| 1303 | 0 |   } | 
| 1304 | 0 |   if (charNum < 0x110000) { | 
| 1305 | 0 |     charNum -= 0x10000; | 
| 1306 | 0 |     buf[0] = (unsigned short)((charNum >> 10) + 0xD800); | 
| 1307 | 0 |     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); | 
| 1308 | 0 |     return 2; | 
| 1309 | 0 |   } | 
| 1310 | 0 |   return 0; | 
| 1311 | 0 | } | 
| 1312 |  |  | 
| 1313 |  | struct unknown_encoding { | 
| 1314 |  |   struct normal_encoding normal; | 
| 1315 |  |   CONVERTER convert; | 
| 1316 |  |   void *userData; | 
| 1317 |  |   unsigned short utf16[256]; | 
| 1318 |  |   char utf8[256][4]; | 
| 1319 |  | }; | 
| 1320 |  |  | 
| 1321 | 0 | #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc)) | 
| 1322 |  |  | 
| 1323 |  | int | 
| 1324 | 0 | XmlSizeOfUnknownEncoding(void) { | 
| 1325 | 0 |   return sizeof(struct unknown_encoding); | 
| 1326 | 0 | } | 
| 1327 |  |  | 
| 1328 |  | static int PTRFASTCALL | 
| 1329 | 0 | unknown_isName(const ENCODING *enc, const char *p) { | 
| 1330 | 0 |   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 
| 1331 | 0 |   int c = uenc->convert(uenc->userData, p); | 
| 1332 | 0 |   if (c & ~0xFFFF) | 
| 1333 | 0 |     return 0; | 
| 1334 | 0 |   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); | 
| 1335 | 0 | } | 
| 1336 |  |  | 
| 1337 |  | static int PTRFASTCALL | 
| 1338 | 0 | unknown_isNmstrt(const ENCODING *enc, const char *p) { | 
| 1339 | 0 |   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 
| 1340 | 0 |   int c = uenc->convert(uenc->userData, p); | 
| 1341 | 0 |   if (c & ~0xFFFF) | 
| 1342 | 0 |     return 0; | 
| 1343 | 0 |   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); | 
| 1344 | 0 | } | 
| 1345 |  |  | 
| 1346 |  | static int PTRFASTCALL | 
| 1347 | 0 | unknown_isInvalid(const ENCODING *enc, const char *p) { | 
| 1348 | 0 |   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 
| 1349 | 0 |   int c = uenc->convert(uenc->userData, p); | 
| 1350 | 0 |   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; | 
| 1351 | 0 | } | 
| 1352 |  |  | 
| 1353 |  | static enum XML_Convert_Result PTRCALL | 
| 1354 |  | unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 1355 | 0 |                char **toP, const char *toLim) { | 
| 1356 | 0 |   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 
| 1357 | 0 |   char buf[XML_UTF8_ENCODE_MAX]; | 
| 1358 | 0 |   for (;;) { | 
| 1359 | 0 |     const char *utf8; | 
| 1360 | 0 |     int n; | 
| 1361 | 0 |     if (*fromP == fromLim) | 
| 1362 | 0 |       return XML_CONVERT_COMPLETED; | 
| 1363 | 0 |     utf8 = uenc->utf8[(unsigned char)**fromP]; | 
| 1364 | 0 |     n = *utf8++; | 
| 1365 | 0 |     if (n == 0) { | 
| 1366 | 0 |       int c = uenc->convert(uenc->userData, *fromP); | 
| 1367 | 0 |       n = XmlUtf8Encode(c, buf); | 
| 1368 | 0 |       if (n > toLim - *toP) | 
| 1369 | 0 |         return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 1370 | 0 |       utf8 = buf; | 
| 1371 | 0 |       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 
| 1372 | 0 |                  - (BT_LEAD2 - 2)); | 
| 1373 | 0 |     } else { | 
| 1374 | 0 |       if (n > toLim - *toP) | 
| 1375 | 0 |         return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 1376 | 0 |       (*fromP)++; | 
| 1377 | 0 |     } | 
| 1378 | 0 |     memcpy(*toP, utf8, n); | 
| 1379 | 0 |     *toP += n; | 
| 1380 | 0 |   } | 
| 1381 | 0 | } | 
| 1382 |  |  | 
| 1383 |  | static enum XML_Convert_Result PTRCALL | 
| 1384 |  | unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, | 
| 1385 | 0 |                 unsigned short **toP, const unsigned short *toLim) { | 
| 1386 | 0 |   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 
| 1387 | 0 |   while (*fromP < fromLim && *toP < toLim) { | 
| 1388 | 0 |     unsigned short c = uenc->utf16[(unsigned char)**fromP]; | 
| 1389 | 0 |     if (c == 0) { | 
| 1390 | 0 |       c = (unsigned short)uenc->convert(uenc->userData, *fromP); | 
| 1391 | 0 |       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 
| 1392 | 0 |                  - (BT_LEAD2 - 2)); | 
| 1393 | 0 |     } else | 
| 1394 | 0 |       (*fromP)++; | 
| 1395 | 0 |     *(*toP)++ = c; | 
| 1396 | 0 |   } | 
| 1397 |  | 
 | 
| 1398 | 0 |   if ((*toP == toLim) && (*fromP < fromLim)) | 
| 1399 | 0 |     return XML_CONVERT_OUTPUT_EXHAUSTED; | 
| 1400 | 0 |   else | 
| 1401 | 0 |     return XML_CONVERT_COMPLETED; | 
| 1402 | 0 | } | 
| 1403 |  |  | 
| 1404 |  | ENCODING * | 
| 1405 |  | XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, | 
| 1406 | 0 |                        void *userData) { | 
| 1407 | 0 |   int i; | 
| 1408 | 0 |   struct unknown_encoding *e = (struct unknown_encoding *)mem; | 
| 1409 | 0 |   memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); | 
| 1410 | 0 |   for (i = 0; i < 128; i++) | 
| 1411 | 0 |     if (latin1_encoding.type[i] != BT_OTHER | 
| 1412 | 0 |         && latin1_encoding.type[i] != BT_NONXML && table[i] != i) | 
| 1413 | 0 |       return 0; | 
| 1414 | 0 |   for (i = 0; i < 256; i++) { | 
| 1415 | 0 |     int c = table[i]; | 
| 1416 | 0 |     if (c == -1) { | 
| 1417 | 0 |       e->normal.type[i] = BT_MALFORM; | 
| 1418 |  |       /* This shouldn't really get used. */ | 
| 1419 | 0 |       e->utf16[i] = 0xFFFF; | 
| 1420 | 0 |       e->utf8[i][0] = 1; | 
| 1421 | 0 |       e->utf8[i][1] = 0; | 
| 1422 | 0 |     } else if (c < 0) { | 
| 1423 | 0 |       if (c < -4) | 
| 1424 | 0 |         return 0; | 
| 1425 |  |       /* Multi-byte sequences need a converter function */ | 
| 1426 | 0 |       if (! convert) | 
| 1427 | 0 |         return 0; | 
| 1428 | 0 |       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); | 
| 1429 | 0 |       e->utf8[i][0] = 0; | 
| 1430 | 0 |       e->utf16[i] = 0; | 
| 1431 | 0 |     } else if (c < 0x80) { | 
| 1432 | 0 |       if (latin1_encoding.type[c] != BT_OTHER | 
| 1433 | 0 |           && latin1_encoding.type[c] != BT_NONXML && c != i) | 
| 1434 | 0 |         return 0; | 
| 1435 | 0 |       e->normal.type[i] = latin1_encoding.type[c]; | 
| 1436 | 0 |       e->utf8[i][0] = 1; | 
| 1437 | 0 |       e->utf8[i][1] = (char)c; | 
| 1438 | 0 |       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); | 
| 1439 | 0 |     } else if (checkCharRefNumber(c) < 0) { | 
| 1440 | 0 |       e->normal.type[i] = BT_NONXML; | 
| 1441 |  |       /* This shouldn't really get used. */ | 
| 1442 | 0 |       e->utf16[i] = 0xFFFF; | 
| 1443 | 0 |       e->utf8[i][0] = 1; | 
| 1444 | 0 |       e->utf8[i][1] = 0; | 
| 1445 | 0 |     } else { | 
| 1446 | 0 |       if (c > 0xFFFF) | 
| 1447 | 0 |         return 0; | 
| 1448 | 0 |       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) | 
| 1449 | 0 |         e->normal.type[i] = BT_NMSTRT; | 
| 1450 | 0 |       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) | 
| 1451 | 0 |         e->normal.type[i] = BT_NAME; | 
| 1452 | 0 |       else | 
| 1453 | 0 |         e->normal.type[i] = BT_OTHER; | 
| 1454 | 0 |       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); | 
| 1455 | 0 |       e->utf16[i] = (unsigned short)c; | 
| 1456 | 0 |     } | 
| 1457 | 0 |   } | 
| 1458 | 0 |   e->userData = userData; | 
| 1459 | 0 |   e->convert = convert; | 
| 1460 | 0 |   if (convert) { | 
| 1461 | 0 |     e->normal.isName2 = unknown_isName; | 
| 1462 | 0 |     e->normal.isName3 = unknown_isName; | 
| 1463 | 0 |     e->normal.isName4 = unknown_isName; | 
| 1464 | 0 |     e->normal.isNmstrt2 = unknown_isNmstrt; | 
| 1465 | 0 |     e->normal.isNmstrt3 = unknown_isNmstrt; | 
| 1466 | 0 |     e->normal.isNmstrt4 = unknown_isNmstrt; | 
| 1467 | 0 |     e->normal.isInvalid2 = unknown_isInvalid; | 
| 1468 | 0 |     e->normal.isInvalid3 = unknown_isInvalid; | 
| 1469 | 0 |     e->normal.isInvalid4 = unknown_isInvalid; | 
| 1470 | 0 |   } | 
| 1471 | 0 |   e->normal.enc.utf8Convert = unknown_toUtf8; | 
| 1472 | 0 |   e->normal.enc.utf16Convert = unknown_toUtf16; | 
| 1473 | 0 |   return &(e->normal.enc); | 
| 1474 | 0 | } | 
| 1475 |  |  | 
| 1476 |  | /* If this enumeration is changed, getEncodingIndex and encodings | 
| 1477 |  | must also be changed. */ | 
| 1478 |  | enum { | 
| 1479 |  |   UNKNOWN_ENC = -1, | 
| 1480 |  |   ISO_8859_1_ENC = 0, | 
| 1481 |  |   US_ASCII_ENC, | 
| 1482 |  |   UTF_8_ENC, | 
| 1483 |  |   UTF_16_ENC, | 
| 1484 |  |   UTF_16BE_ENC, | 
| 1485 |  |   UTF_16LE_ENC, | 
| 1486 |  |   /* must match encodingNames up to here */ | 
| 1487 |  |   NO_ENC | 
| 1488 |  | }; | 
| 1489 |  |  | 
| 1490 |  | static const char KW_ISO_8859_1[] | 
| 1491 |  |     = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8, | 
| 1492 |  |        ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'}; | 
| 1493 |  | static const char KW_US_ASCII[] | 
| 1494 |  |     = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, | 
| 1495 |  |        ASCII_C, ASCII_I, ASCII_I,     '\0'}; | 
| 1496 |  | static const char KW_UTF_8[] | 
| 1497 |  |     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; | 
| 1498 |  | static const char KW_UTF_16[] | 
| 1499 |  |     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; | 
| 1500 |  | static const char KW_UTF_16BE[] | 
| 1501 |  |     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, | 
| 1502 |  |        ASCII_6, ASCII_B, ASCII_E, '\0'}; | 
| 1503 |  | static const char KW_UTF_16LE[] | 
| 1504 |  |     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, | 
| 1505 |  |        ASCII_6, ASCII_L, ASCII_E, '\0'}; | 
| 1506 |  |  | 
| 1507 |  | static int FASTCALL | 
| 1508 | 21.5k | getEncodingIndex(const char *name) { | 
| 1509 | 21.5k |   static const char *const encodingNames[] = { | 
| 1510 | 21.5k |       KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, | 
| 1511 | 21.5k |   }; | 
| 1512 | 21.5k |   int i; | 
| 1513 | 21.5k |   if (name == NULL) | 
| 1514 | 20.0k |     return NO_ENC; | 
| 1515 | 1.80k |   for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) | 
| 1516 | 1.76k |     if (streqci(name, encodingNames[i])) | 
| 1517 | 1.41k |       return i; | 
| 1518 | 48 |   return UNKNOWN_ENC; | 
| 1519 | 1.46k | } | 
| 1520 |  |  | 
| 1521 |  | /* For binary compatibility, we store the index of the encoding | 
| 1522 |  |    specified at initialization in the isUtf16 member. | 
| 1523 |  | */ | 
| 1524 |  |  | 
| 1525 | 9.87k | #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) | 
| 1526 | 20.0k | #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) | 
| 1527 |  |  | 
| 1528 |  | /* This is what detects the encoding.  encodingTable maps from | 
| 1529 |  |    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of | 
| 1530 |  |    the external (protocol) specified encoding; state is | 
| 1531 |  |    XML_CONTENT_STATE if we're parsing an external text entity, and | 
| 1532 |  |    XML_PROLOG_STATE otherwise. | 
| 1533 |  | */ | 
| 1534 |  |  | 
| 1535 |  | static int | 
| 1536 |  | initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, | 
| 1537 | 10.0k |          int state, const char *ptr, const char *end, const char **nextTokPtr) { | 
| 1538 | 10.0k |   const ENCODING **encPtr; | 
| 1539 |  |  | 
| 1540 | 10.0k |   if (ptr >= end) | 
| 1541 | 2 |     return XML_TOK_NONE; | 
| 1542 | 10.0k |   encPtr = enc->encPtr; | 
| 1543 | 10.0k |   if (ptr + 1 == end) { | 
| 1544 |  |     /* only a single byte available for auto-detection */ | 
| 1545 | 16 | #ifndef XML_DTD /* FIXME */ | 
| 1546 |  |     /* a well-formed document entity must have more than one byte */ | 
| 1547 | 16 |     if (state != XML_CONTENT_STATE) | 
| 1548 | 16 |       return XML_TOK_PARTIAL; | 
| 1549 | 0 | #endif | 
| 1550 |  |     /* so we're parsing an external text entity... */ | 
| 1551 |  |     /* if UTF-16 was externally specified, then we need at least 2 bytes */ | 
| 1552 | 0 |     switch (INIT_ENC_INDEX(enc)) { | 
| 1553 | 0 |     case UTF_16_ENC: | 
| 1554 | 0 |     case UTF_16LE_ENC: | 
| 1555 | 0 |     case UTF_16BE_ENC: | 
| 1556 | 0 |       return XML_TOK_PARTIAL; | 
| 1557 | 0 |     } | 
| 1558 | 0 |     switch ((unsigned char)*ptr) { | 
| 1559 | 0 |     case 0xFE: | 
| 1560 | 0 |     case 0xFF: | 
| 1561 | 0 |     case 0xEF: /* possibly first byte of UTF-8 BOM */ | 
| 1562 | 0 |       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) | 
| 1563 | 0 |         break; | 
| 1564 |  |       /* fall through */ | 
| 1565 | 0 |     case 0x00: | 
| 1566 | 0 |     case 0x3C: | 
| 1567 | 0 |       return XML_TOK_PARTIAL; | 
| 1568 | 0 |     } | 
| 1569 | 10.0k |   } else { | 
| 1570 | 10.0k |     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { | 
| 1571 | 103 |     case 0xFEFF: | 
| 1572 | 103 |       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) | 
| 1573 | 0 |         break; | 
| 1574 | 103 |       *nextTokPtr = ptr + 2; | 
| 1575 | 103 |       *encPtr = encodingTable[UTF_16BE_ENC]; | 
| 1576 | 103 |       return XML_TOK_BOM; | 
| 1577 |  |     /* 00 3C is handled in the default case */ | 
| 1578 | 0 |     case 0x3C00: | 
| 1579 | 0 |       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC | 
| 1580 | 0 |            || INIT_ENC_INDEX(enc) == UTF_16_ENC) | 
| 1581 | 0 |           && state == XML_CONTENT_STATE) | 
| 1582 | 0 |         break; | 
| 1583 | 0 |       *encPtr = encodingTable[UTF_16LE_ENC]; | 
| 1584 | 0 |       return XmlTok(*encPtr, state, ptr, end, nextTokPtr); | 
| 1585 | 115 |     case 0xFFFE: | 
| 1586 | 115 |       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) | 
| 1587 | 0 |         break; | 
| 1588 | 115 |       *nextTokPtr = ptr + 2; | 
| 1589 | 115 |       *encPtr = encodingTable[UTF_16LE_ENC]; | 
| 1590 | 115 |       return XML_TOK_BOM; | 
| 1591 | 139 |     case 0xEFBB: | 
| 1592 |  |       /* Maybe a UTF-8 BOM (EF BB BF) */ | 
| 1593 |  |       /* If there's an explicitly specified (external) encoding | 
| 1594 |  |          of ISO-8859-1 or some flavour of UTF-16 | 
| 1595 |  |          and this is an external text entity, | 
| 1596 |  |          don't look for the BOM, | 
| 1597 |  |          because it might be a legal data. | 
| 1598 |  |       */ | 
| 1599 | 139 |       if (state == XML_CONTENT_STATE) { | 
| 1600 | 0 |         int e = INIT_ENC_INDEX(enc); | 
| 1601 | 0 |         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC | 
| 1602 | 0 |             || e == UTF_16_ENC) | 
| 1603 | 0 |           break; | 
| 1604 | 0 |       } | 
| 1605 | 139 |       if (ptr + 2 == end) | 
| 1606 | 2 |         return XML_TOK_PARTIAL; | 
| 1607 | 137 |       if ((unsigned char)ptr[2] == 0xBF) { | 
| 1608 | 133 |         *nextTokPtr = ptr + 3; | 
| 1609 | 133 |         *encPtr = encodingTable[UTF_8_ENC]; | 
| 1610 | 133 |         return XML_TOK_BOM; | 
| 1611 | 133 |       } | 
| 1612 | 4 |       break; | 
| 1613 | 9.65k |     default: | 
| 1614 | 9.65k |       if (ptr[0] == '\0') { | 
| 1615 |  |         /* 0 isn't a legal data character. Furthermore a document | 
| 1616 |  |            entity can only start with ASCII characters.  So the only | 
| 1617 |  |            way this can fail to be big-endian UTF-16 if it it's an | 
| 1618 |  |            external parsed general entity that's labelled as | 
| 1619 |  |            UTF-16LE. | 
| 1620 |  |         */ | 
| 1621 | 0 |         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) | 
| 1622 | 0 |           break; | 
| 1623 | 0 |         *encPtr = encodingTable[UTF_16BE_ENC]; | 
| 1624 | 0 |         return XmlTok(*encPtr, state, ptr, end, nextTokPtr); | 
| 1625 | 9.65k |       } else if (ptr[1] == '\0') { | 
| 1626 |  |         /* We could recover here in the case: | 
| 1627 |  |             - parsing an external entity | 
| 1628 |  |             - second byte is 0 | 
| 1629 |  |             - no externally specified encoding | 
| 1630 |  |             - no encoding declaration | 
| 1631 |  |            by assuming UTF-16LE.  But we don't, because this would mean when | 
| 1632 |  |            presented just with a single byte, we couldn't reliably determine | 
| 1633 |  |            whether we needed further bytes. | 
| 1634 |  |         */ | 
| 1635 | 0 |         if (state == XML_CONTENT_STATE) | 
| 1636 | 0 |           break; | 
| 1637 | 0 |         *encPtr = encodingTable[UTF_16LE_ENC]; | 
| 1638 | 0 |         return XmlTok(*encPtr, state, ptr, end, nextTokPtr); | 
| 1639 | 0 |       } | 
| 1640 | 9.65k |       break; | 
| 1641 | 10.0k |     } | 
| 1642 | 10.0k |   } | 
| 1643 | 9.65k |   *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; | 
| 1644 | 9.65k |   return XmlTok(*encPtr, state, ptr, end, nextTokPtr); | 
| 1645 | 10.0k | } | 
| 1646 |  |  | 
| 1647 | 53.3k | #define NS(x) x | 
| 1648 | 10.0k | #define ns(x) x | 
| 1649 |  | #define XML_TOK_NS_C | 
| 1650 |  | #include "xmltok_ns.c" | 
| 1651 |  | #undef XML_TOK_NS_C | 
| 1652 |  | #undef NS | 
| 1653 |  | #undef ns | 
| 1654 |  |  | 
| 1655 |  | #ifdef XML_NS | 
| 1656 |  |  | 
| 1657 |  | #  define NS(x) x##NS | 
| 1658 |  | #  define ns(x) x##_ns | 
| 1659 |  |  | 
| 1660 |  | #  define XML_TOK_NS_C | 
| 1661 |  | #  include "xmltok_ns.c" | 
| 1662 |  | #  undef XML_TOK_NS_C | 
| 1663 |  |  | 
| 1664 |  | #  undef NS | 
| 1665 |  | #  undef ns | 
| 1666 |  |  | 
| 1667 |  | ENCODING * | 
| 1668 |  | XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, | 
| 1669 |  |                          void *userData) { | 
| 1670 |  |   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); | 
| 1671 |  |   if (enc) | 
| 1672 |  |     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; | 
| 1673 |  |   return enc; | 
| 1674 |  | } | 
| 1675 |  |  | 
| 1676 |  | #endif /* XML_NS */ |