Coverage Report

Created: 2023-12-13 20:02

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
62
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
63
64
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
65
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
66
struct _xmlCharEncodingAlias {
67
    const char *name;
68
    const char *alias;
69
};
70
71
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
72
static int xmlCharEncodingAliasesNb = 0;
73
static int xmlCharEncodingAliasesMax = 0;
74
75
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
76
#if 0
77
#define DEBUG_ENCODING  /* Define this to get encoding traces */
78
#endif
79
#else
80
#ifdef LIBXML_ISO8859X_ENABLED
81
static void xmlRegisterCharEncodingHandlersISO8859x (void);
82
#endif
83
#endif
84
85
static int xmlLittleEndian = 1;
86
87
/**
88
 * xmlEncodingErrMemory:
89
 * @extra:  extra information
90
 *
91
 * Handle an out of memory condition
92
 */
93
static void
94
xmlEncodingErrMemory(const char *extra)
95
0
{
96
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
97
0
}
98
99
/**
100
 * xmlErrEncoding:
101
 * @error:  the error number
102
 * @msg:  the error message
103
 *
104
 * n encoding error
105
 */
106
static void LIBXML_ATTR_FORMAT(2,0)
107
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
108
20.3k
{
109
20.3k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
110
20.3k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
111
20.3k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
112
20.3k
}
113
114
#ifdef LIBXML_ICU_ENABLED
115
static uconv_t*
116
openIcuConverter(const char* name, int toUnicode)
117
{
118
  UErrorCode status = U_ZERO_ERROR;
119
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
120
  if (conv == NULL)
121
    return NULL;
122
123
  conv->pivot_source = conv->pivot_buf;
124
  conv->pivot_target = conv->pivot_buf;
125
126
  conv->uconv = ucnv_open(name, &status);
127
  if (U_FAILURE(status))
128
    goto error;
129
130
  status = U_ZERO_ERROR;
131
  if (toUnicode) {
132
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
133
                        NULL, NULL, NULL, &status);
134
  }
135
  else {
136
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
137
                        NULL, NULL, NULL, &status);
138
  }
139
  if (U_FAILURE(status))
140
    goto error;
141
142
  status = U_ZERO_ERROR;
143
  conv->utf8 = ucnv_open("UTF-8", &status);
144
  if (U_SUCCESS(status))
145
    return conv;
146
147
error:
148
  if (conv->uconv)
149
    ucnv_close(conv->uconv);
150
  xmlFree(conv);
151
  return NULL;
152
}
153
154
static void
155
closeIcuConverter(uconv_t *conv)
156
{
157
  if (conv != NULL) {
158
    ucnv_close(conv->uconv);
159
    ucnv_close(conv->utf8);
160
    xmlFree(conv);
161
  }
162
}
163
#endif /* LIBXML_ICU_ENABLED */
164
165
/************************************************************************
166
 *                  *
167
 *    Conversions To/From UTF8 encoding     *
168
 *                  *
169
 ************************************************************************/
170
171
/**
172
 * asciiToUTF8:
173
 * @out:  a pointer to an array of bytes to store the result
174
 * @outlen:  the length of @out
175
 * @in:  a pointer to an array of ASCII chars
176
 * @inlen:  the length of @in
177
 *
178
 * Take a block of ASCII chars in and try to convert it to an UTF-8
179
 * block of chars out.
180
 * Returns 0 if success, or -1 otherwise
181
 * The value of @inlen after return is the number of octets consumed
182
 *     if the return value is positive, else unpredictable.
183
 * The value of @outlen after return is the number of octets produced.
184
 */
185
static int
186
asciiToUTF8(unsigned char* out, int *outlen,
187
36.5k
              const unsigned char* in, int *inlen) {
188
36.5k
    unsigned char* outstart = out;
189
36.5k
    const unsigned char* base = in;
190
36.5k
    const unsigned char* processed = in;
191
36.5k
    unsigned char* outend = out + *outlen;
192
36.5k
    const unsigned char* inend;
193
36.5k
    unsigned int c;
194
195
36.5k
    inend = in + (*inlen);
196
795k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
197
790k
  c= *in++;
198
199
790k
        if (out >= outend)
200
0
      break;
201
790k
        if (c < 0x80) {
202
759k
      *out++ = c;
203
759k
  } else {
204
31.3k
      *outlen = out - outstart;
205
31.3k
      *inlen = processed - base;
206
31.3k
      return(-1);
207
31.3k
  }
208
209
759k
  processed = (const unsigned char*) in;
210
759k
    }
211
5.18k
    *outlen = out - outstart;
212
5.18k
    *inlen = processed - base;
213
5.18k
    return(*outlen);
214
36.5k
}
215
216
#ifdef LIBXML_OUTPUT_ENABLED
217
/**
218
 * UTF8Toascii:
219
 * @out:  a pointer to an array of bytes to store the result
220
 * @outlen:  the length of @out
221
 * @in:  a pointer to an array of UTF-8 chars
222
 * @inlen:  the length of @in
223
 *
224
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
225
 * block of chars out.
226
 *
227
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
228
 * The value of @inlen after return is the number of octets consumed
229
 *     if the return value is positive, else unpredictable.
230
 * The value of @outlen after return is the number of octets produced.
231
 */
232
static int
233
UTF8Toascii(unsigned char* out, int *outlen,
234
1.21k
              const unsigned char* in, int *inlen) {
235
1.21k
    const unsigned char* processed = in;
236
1.21k
    const unsigned char* outend;
237
1.21k
    const unsigned char* outstart = out;
238
1.21k
    const unsigned char* instart = in;
239
1.21k
    const unsigned char* inend;
240
1.21k
    unsigned int c, d;
241
1.21k
    int trailing;
242
243
1.21k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
244
1.21k
    if (in == NULL) {
245
        /*
246
   * initialization nothing to do
247
   */
248
534
  *outlen = 0;
249
534
  *inlen = 0;
250
534
  return(0);
251
534
    }
252
682
    inend = in + (*inlen);
253
682
    outend = out + (*outlen);
254
117k
    while (in < inend) {
255
117k
  d = *in++;
256
117k
  if      (d < 0x80)  { c= d; trailing= 0; }
257
74
  else if (d < 0xC0) {
258
      /* trailing byte in leading position */
259
0
      *outlen = out - outstart;
260
0
      *inlen = processed - instart;
261
0
      return(-2);
262
74
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
263
12
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
264
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
265
0
  else {
266
      /* no chance for this in Ascii */
267
0
      *outlen = out - outstart;
268
0
      *inlen = processed - instart;
269
0
      return(-2);
270
0
  }
271
272
117k
  if (inend - in < trailing) {
273
0
      break;
274
0
  }
275
276
117k
  for ( ; trailing; trailing--) {
277
86
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
278
0
    break;
279
86
      c <<= 6;
280
86
      c |= d & 0x3F;
281
86
  }
282
283
  /* assertion: c is a single UTF-4 value */
284
117k
  if (c < 0x80) {
285
117k
      if (out >= outend)
286
0
    break;
287
117k
      *out++ = c;
288
117k
  } else {
289
      /* no chance for this in Ascii */
290
74
      *outlen = out - outstart;
291
74
      *inlen = processed - instart;
292
74
      return(-2);
293
74
  }
294
117k
  processed = in;
295
117k
    }
296
608
    *outlen = out - outstart;
297
608
    *inlen = processed - instart;
298
608
    return(*outlen);
299
682
}
300
#endif /* LIBXML_OUTPUT_ENABLED */
301
302
/**
303
 * isolat1ToUTF8:
304
 * @out:  a pointer to an array of bytes to store the result
305
 * @outlen:  the length of @out
306
 * @in:  a pointer to an array of ISO Latin 1 chars
307
 * @inlen:  the length of @in
308
 *
309
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
310
 * block of chars out.
311
 * Returns the number of bytes written if success, or -1 otherwise
312
 * The value of @inlen after return is the number of octets consumed
313
 *     if the return value is positive, else unpredictable.
314
 * The value of @outlen after return is the number of octets produced.
315
 */
316
int
317
isolat1ToUTF8(unsigned char* out, int *outlen,
318
2.16M
              const unsigned char* in, int *inlen) {
319
2.16M
    unsigned char* outstart = out;
320
2.16M
    const unsigned char* base = in;
321
2.16M
    unsigned char* outend;
322
2.16M
    const unsigned char* inend;
323
2.16M
    const unsigned char* instop;
324
325
2.16M
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
326
0
  return(-1);
327
328
2.16M
    outend = out + *outlen;
329
2.16M
    inend = in + (*inlen);
330
2.16M
    instop = inend;
331
332
4.79M
    while ((in < inend) && (out < outend - 1)) {
333
2.62M
  if (*in >= 0x80) {
334
457k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
335
457k
            *out++ = ((*in) & 0x3F) | 0x80;
336
457k
      ++in;
337
457k
  }
338
2.62M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
339
811M
  while ((in < instop) && (*in < 0x80)) {
340
809M
      *out++ = *in++;
341
809M
  }
342
2.62M
    }
343
2.16M
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
344
0
        *out++ = *in++;
345
0
    }
346
2.16M
    *outlen = out - outstart;
347
2.16M
    *inlen = in - base;
348
2.16M
    return(*outlen);
349
2.16M
}
350
351
/**
352
 * UTF8ToUTF8:
353
 * @out:  a pointer to an array of bytes to store the result
354
 * @outlen:  the length of @out
355
 * @inb:  a pointer to an array of UTF-8 chars
356
 * @inlenb:  the length of @in in UTF-8 chars
357
 *
358
 * No op copy operation for UTF8 handling.
359
 *
360
 * Returns the number of bytes written, or -1 if lack of space.
361
 *     The value of *inlen after return is the number of octets consumed
362
 *     if the return value is positive, else unpredictable.
363
 */
364
static int
365
UTF8ToUTF8(unsigned char* out, int *outlen,
366
           const unsigned char* inb, int *inlenb)
367
33.6k
{
368
33.6k
    int len;
369
370
33.6k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
371
0
  return(-1);
372
33.6k
    if (inb == NULL) {
373
        /* inb == NULL means output is initialized. */
374
6.19k
        *outlen = 0;
375
6.19k
        *inlenb = 0;
376
6.19k
        return(0);
377
6.19k
    }
378
27.4k
    if (*outlen > *inlenb) {
379
27.4k
  len = *inlenb;
380
27.4k
    } else {
381
0
  len = *outlen;
382
0
    }
383
27.4k
    if (len < 0)
384
0
  return(-1);
385
386
    /*
387
     * FIXME: Conversion functions must assure valid UTF-8, so we have
388
     * to check for UTF-8 validity. Preferably, this converter shouldn't
389
     * be used at all.
390
     */
391
27.4k
    memcpy(out, inb, len);
392
393
27.4k
    *outlen = len;
394
27.4k
    *inlenb = len;
395
27.4k
    return(*outlen);
396
27.4k
}
397
398
399
#ifdef LIBXML_OUTPUT_ENABLED
400
/**
401
 * UTF8Toisolat1:
402
 * @out:  a pointer to an array of bytes to store the result
403
 * @outlen:  the length of @out
404
 * @in:  a pointer to an array of UTF-8 chars
405
 * @inlen:  the length of @in
406
 *
407
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
408
 * block of chars out.
409
 *
410
 * Returns the number of bytes written if success, -2 if the transcoding fails,
411
           or -1 otherwise
412
 * The value of @inlen after return is the number of octets consumed
413
 *     if the return value is positive, else unpredictable.
414
 * The value of @outlen after return is the number of octets produced.
415
 */
416
int
417
UTF8Toisolat1(unsigned char* out, int *outlen,
418
59.7k
              const unsigned char* in, int *inlen) {
419
59.7k
    const unsigned char* processed = in;
420
59.7k
    const unsigned char* outend;
421
59.7k
    const unsigned char* outstart = out;
422
59.7k
    const unsigned char* instart = in;
423
59.7k
    const unsigned char* inend;
424
59.7k
    unsigned int c, d;
425
59.7k
    int trailing;
426
427
59.7k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
428
59.7k
    if (in == NULL) {
429
        /*
430
   * initialization nothing to do
431
   */
432
5.53k
  *outlen = 0;
433
5.53k
  *inlen = 0;
434
5.53k
  return(0);
435
5.53k
    }
436
54.1k
    inend = in + (*inlen);
437
54.1k
    outend = out + (*outlen);
438
206M
    while (in < inend) {
439
206M
  d = *in++;
440
206M
  if      (d < 0x80)  { c= d; trailing= 0; }
441
50.6k
  else if (d < 0xC0) {
442
      /* trailing byte in leading position */
443
671
      *outlen = out - outstart;
444
671
      *inlen = processed - instart;
445
671
      return(-2);
446
49.9k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
447
766
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
448
541
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
449
310
  else {
450
      /* no chance for this in IsoLat1 */
451
310
      *outlen = out - outstart;
452
310
      *inlen = processed - instart;
453
310
      return(-2);
454
310
  }
455
456
206M
  if (inend - in < trailing) {
457
17
      break;
458
17
  }
459
460
206M
  for ( ; trailing; trailing--) {
461
49.9k
      if (in >= inend)
462
0
    break;
463
49.9k
      if (((d= *in++) & 0xC0) != 0x80) {
464
340
    *outlen = out - outstart;
465
340
    *inlen = processed - instart;
466
340
    return(-2);
467
340
      }
468
49.5k
      c <<= 6;
469
49.5k
      c |= d & 0x3F;
470
49.5k
  }
471
472
  /* assertion: c is a single UTF-4 value */
473
206M
  if (c <= 0xFF) {
474
206M
      if (out >= outend)
475
0
    break;
476
206M
      *out++ = c;
477
206M
  } else {
478
      /* no chance for this in IsoLat1 */
479
381
      *outlen = out - outstart;
480
381
      *inlen = processed - instart;
481
381
      return(-2);
482
381
  }
483
206M
  processed = in;
484
206M
    }
485
52.4k
    *outlen = out - outstart;
486
52.4k
    *inlen = processed - instart;
487
52.4k
    return(*outlen);
488
54.1k
}
489
#endif /* LIBXML_OUTPUT_ENABLED */
490
491
/**
492
 * UTF16LEToUTF8:
493
 * @out:  a pointer to an array of bytes to store the result
494
 * @outlen:  the length of @out
495
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
496
 * @inlenb:  the length of @in in UTF-16LE chars
497
 *
498
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
499
 * block of chars out. This function assumes the endian property
500
 * is the same between the native type of this machine and the
501
 * inputed one.
502
 *
503
 * Returns the number of bytes written, or -1 if lack of space, or -2
504
 *     if the transcoding fails (if *in is not a valid utf16 string)
505
 *     The value of *inlen after return is the number of octets consumed
506
 *     if the return value is positive, else unpredictable.
507
 */
508
static int
509
UTF16LEToUTF8(unsigned char* out, int *outlen,
510
            const unsigned char* inb, int *inlenb)
511
44.3k
{
512
44.3k
    unsigned char* outstart = out;
513
44.3k
    const unsigned char* processed = inb;
514
44.3k
    unsigned char* outend;
515
44.3k
    unsigned short* in = (unsigned short*) inb;
516
44.3k
    unsigned short* inend;
517
44.3k
    unsigned int c, d, inlen;
518
44.3k
    unsigned char *tmp;
519
44.3k
    int bits;
520
521
44.3k
    if (*outlen == 0) {
522
0
        *inlenb = 0;
523
0
        return(0);
524
0
    }
525
44.3k
    outend = out + *outlen;
526
44.3k
    if ((*inlenb % 2) == 1)
527
25.1k
        (*inlenb)--;
528
44.3k
    inlen = *inlenb / 2;
529
44.3k
    inend = in + inlen;
530
1.06M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
531
1.02M
        if (xmlLittleEndian) {
532
1.02M
      c= *in++;
533
1.02M
  } else {
534
0
      tmp = (unsigned char *) in;
535
0
      c = *tmp++;
536
0
      c = c | (*tmp << 8);
537
0
      in++;
538
0
  }
539
1.02M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
540
6.74k
      if (in >= inend) {           /* handle split mutli-byte characters */
541
2.33k
    break;
542
2.33k
      }
543
4.40k
      if (xmlLittleEndian) {
544
4.40k
    d = *in++;
545
4.40k
      } else {
546
0
    tmp = (unsigned char *) in;
547
0
    d = *tmp++;
548
0
    d = d | (*tmp << 8);
549
0
    in++;
550
0
      }
551
4.40k
            if ((d & 0xFC00) == 0xDC00) {
552
413
                c &= 0x03FF;
553
413
                c <<= 10;
554
413
                c |= d & 0x03FF;
555
413
                c += 0x10000;
556
413
            }
557
3.99k
            else {
558
3.99k
    *outlen = out - outstart;
559
3.99k
    *inlenb = processed - inb;
560
3.99k
          return(-2);
561
3.99k
      }
562
4.40k
        }
563
564
  /* assertion: c is a single UTF-4 value */
565
1.01M
        if (out >= outend)
566
0
      break;
567
1.01M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
568
972k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
569
959k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
570
413
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
571
572
2.95M
        for ( ; bits >= 0; bits-= 6) {
573
1.93M
            if (out >= outend)
574
0
          break;
575
1.93M
            *out++= ((c >> bits) & 0x3F) | 0x80;
576
1.93M
        }
577
1.01M
  processed = (const unsigned char*) in;
578
1.01M
    }
579
40.3k
    *outlen = out - outstart;
580
40.3k
    *inlenb = processed - inb;
581
40.3k
    return(*outlen);
582
44.3k
}
583
584
#ifdef LIBXML_OUTPUT_ENABLED
585
/**
586
 * UTF8ToUTF16LE:
587
 * @outb:  a pointer to an array of bytes to store the result
588
 * @outlen:  the length of @outb
589
 * @in:  a pointer to an array of UTF-8 chars
590
 * @inlen:  the length of @in
591
 *
592
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
593
 * block of chars out.
594
 *
595
 * Returns the number of bytes written, or -1 if lack of space, or -2
596
 *     if the transcoding failed.
597
 */
598
static int
599
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
600
            const unsigned char* in, int *inlen)
601
0
{
602
0
    unsigned short* out = (unsigned short*) outb;
603
0
    const unsigned char* processed = in;
604
0
    const unsigned char *const instart = in;
605
0
    unsigned short* outstart= out;
606
0
    unsigned short* outend;
607
0
    const unsigned char* inend;
608
0
    unsigned int c, d;
609
0
    int trailing;
610
0
    unsigned char *tmp;
611
0
    unsigned short tmp1, tmp2;
612
613
    /* UTF16LE encoding has no BOM */
614
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
615
0
    if (in == NULL) {
616
0
  *outlen = 0;
617
0
  *inlen = 0;
618
0
  return(0);
619
0
    }
620
0
    inend= in + *inlen;
621
0
    outend = out + (*outlen / 2);
622
0
    while (in < inend) {
623
0
      d= *in++;
624
0
      if      (d < 0x80)  { c= d; trailing= 0; }
625
0
      else if (d < 0xC0) {
626
          /* trailing byte in leading position */
627
0
    *outlen = (out - outstart) * 2;
628
0
    *inlen = processed - instart;
629
0
    return(-2);
630
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
631
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
632
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
633
0
      else {
634
  /* no chance for this in UTF-16 */
635
0
  *outlen = (out - outstart) * 2;
636
0
  *inlen = processed - instart;
637
0
  return(-2);
638
0
      }
639
640
0
      if (inend - in < trailing) {
641
0
          break;
642
0
      }
643
644
0
      for ( ; trailing; trailing--) {
645
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
646
0
        break;
647
0
          c <<= 6;
648
0
          c |= d & 0x3F;
649
0
      }
650
651
      /* assertion: c is a single UTF-4 value */
652
0
        if (c < 0x10000) {
653
0
            if (out >= outend)
654
0
          break;
655
0
      if (xmlLittleEndian) {
656
0
    *out++ = c;
657
0
      } else {
658
0
    tmp = (unsigned char *) out;
659
0
    *tmp = (unsigned char) c; /* Explicit truncation */
660
0
    *(tmp + 1) = c >> 8 ;
661
0
    out++;
662
0
      }
663
0
        }
664
0
        else if (c < 0x110000) {
665
0
            if (out+1 >= outend)
666
0
          break;
667
0
            c -= 0x10000;
668
0
      if (xmlLittleEndian) {
669
0
    *out++ = 0xD800 | (c >> 10);
670
0
    *out++ = 0xDC00 | (c & 0x03FF);
671
0
      } else {
672
0
    tmp1 = 0xD800 | (c >> 10);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp1 >> 8;
676
0
    out++;
677
678
0
    tmp2 = 0xDC00 | (c & 0x03FF);
679
0
    tmp = (unsigned char *) out;
680
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
681
0
    *(tmp + 1) = tmp2 >> 8;
682
0
    out++;
683
0
      }
684
0
        }
685
0
        else
686
0
      break;
687
0
  processed = in;
688
0
    }
689
0
    *outlen = (out - outstart) * 2;
690
0
    *inlen = processed - instart;
691
0
    return(*outlen);
692
0
}
693
694
/**
695
 * UTF8ToUTF16:
696
 * @outb:  a pointer to an array of bytes to store the result
697
 * @outlen:  the length of @outb
698
 * @in:  a pointer to an array of UTF-8 chars
699
 * @inlen:  the length of @in
700
 *
701
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
702
 * block of chars out.
703
 *
704
 * Returns the number of bytes written, or -1 if lack of space, or -2
705
 *     if the transcoding failed.
706
 */
707
static int
708
UTF8ToUTF16(unsigned char* outb, int *outlen,
709
            const unsigned char* in, int *inlen)
710
0
{
711
0
    if (in == NULL) {
712
  /*
713
   * initialization, add the Byte Order Mark for UTF-16LE
714
   */
715
0
        if (*outlen >= 2) {
716
0
      outb[0] = 0xFF;
717
0
      outb[1] = 0xFE;
718
0
      *outlen = 2;
719
0
      *inlen = 0;
720
#ifdef DEBUG_ENCODING
721
            xmlGenericError(xmlGenericErrorContext,
722
        "Added FFFE Byte Order Mark\n");
723
#endif
724
0
      return(2);
725
0
  }
726
0
  *outlen = 0;
727
0
  *inlen = 0;
728
0
  return(0);
729
0
    }
730
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
731
0
}
732
#endif /* LIBXML_OUTPUT_ENABLED */
733
734
/**
735
 * UTF16BEToUTF8:
736
 * @out:  a pointer to an array of bytes to store the result
737
 * @outlen:  the length of @out
738
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
739
 * @inlenb:  the length of @in in UTF-16 chars
740
 *
741
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
742
 * block of chars out. This function assumes the endian property
743
 * is the same between the native type of this machine and the
744
 * inputed one.
745
 *
746
 * Returns the number of bytes written, or -1 if lack of space, or -2
747
 *     if the transcoding fails (if *in is not a valid utf16 string)
748
 * The value of *inlen after return is the number of octets consumed
749
 *     if the return value is positive, else unpredictable.
750
 */
751
static int
752
UTF16BEToUTF8(unsigned char* out, int *outlen,
753
            const unsigned char* inb, int *inlenb)
754
19.7k
{
755
19.7k
    unsigned char* outstart = out;
756
19.7k
    const unsigned char* processed = inb;
757
19.7k
    unsigned char* outend;
758
19.7k
    unsigned short* in = (unsigned short*) inb;
759
19.7k
    unsigned short* inend;
760
19.7k
    unsigned int c, d, inlen;
761
19.7k
    unsigned char *tmp;
762
19.7k
    int bits;
763
764
19.7k
    if (*outlen == 0) {
765
0
        *inlenb = 0;
766
0
        return(0);
767
0
    }
768
19.7k
    outend = out + *outlen;
769
19.7k
    if ((*inlenb % 2) == 1)
770
10.3k
        (*inlenb)--;
771
19.7k
    inlen = *inlenb / 2;
772
19.7k
    inend= in + inlen;
773
416k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
774
399k
  if (xmlLittleEndian) {
775
399k
      tmp = (unsigned char *) in;
776
399k
      c = *tmp++;
777
399k
      c = (c << 8) | *tmp;
778
399k
      in++;
779
399k
  } else {
780
0
      c= *in++;
781
0
  }
782
399k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
783
3.19k
      if (in >= inend) {           /* handle split mutli-byte characters */
784
1.49k
                break;
785
1.49k
      }
786
1.70k
      if (xmlLittleEndian) {
787
1.70k
    tmp = (unsigned char *) in;
788
1.70k
    d = *tmp++;
789
1.70k
    d = (d << 8) | *tmp;
790
1.70k
    in++;
791
1.70k
      } else {
792
0
    d= *in++;
793
0
      }
794
1.70k
            if ((d & 0xFC00) == 0xDC00) {
795
392
                c &= 0x03FF;
796
392
                c <<= 10;
797
392
                c |= d & 0x03FF;
798
392
                c += 0x10000;
799
392
            }
800
1.31k
            else {
801
1.31k
    *outlen = out - outstart;
802
1.31k
    *inlenb = processed - inb;
803
1.31k
          return(-2);
804
1.31k
      }
805
1.70k
        }
806
807
  /* assertion: c is a single UTF-4 value */
808
396k
        if (out >= outend)
809
0
      break;
810
396k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
811
373k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
812
365k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
813
392
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
814
815
1.13M
        for ( ; bits >= 0; bits-= 6) {
816
740k
            if (out >= outend)
817
0
          break;
818
740k
            *out++= ((c >> bits) & 0x3F) | 0x80;
819
740k
        }
820
396k
  processed = (const unsigned char*) in;
821
396k
    }
822
18.4k
    *outlen = out - outstart;
823
18.4k
    *inlenb = processed - inb;
824
18.4k
    return(*outlen);
825
19.7k
}
826
827
#ifdef LIBXML_OUTPUT_ENABLED
828
/**
829
 * UTF8ToUTF16BE:
830
 * @outb:  a pointer to an array of bytes to store the result
831
 * @outlen:  the length of @outb
832
 * @in:  a pointer to an array of UTF-8 chars
833
 * @inlen:  the length of @in
834
 *
835
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
836
 * block of chars out.
837
 *
838
 * Returns the number of byte written, or -1 by lack of space, or -2
839
 *     if the transcoding failed.
840
 */
841
static int
842
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
843
            const unsigned char* in, int *inlen)
844
0
{
845
0
    unsigned short* out = (unsigned short*) outb;
846
0
    const unsigned char* processed = in;
847
0
    const unsigned char *const instart = in;
848
0
    unsigned short* outstart= out;
849
0
    unsigned short* outend;
850
0
    const unsigned char* inend;
851
0
    unsigned int c, d;
852
0
    int trailing;
853
0
    unsigned char *tmp;
854
0
    unsigned short tmp1, tmp2;
855
856
    /* UTF-16BE has no BOM */
857
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
858
0
    if (in == NULL) {
859
0
  *outlen = 0;
860
0
  *inlen = 0;
861
0
  return(0);
862
0
    }
863
0
    inend= in + *inlen;
864
0
    outend = out + (*outlen / 2);
865
0
    while (in < inend) {
866
0
      d= *in++;
867
0
      if      (d < 0x80)  { c= d; trailing= 0; }
868
0
      else if (d < 0xC0)  {
869
          /* trailing byte in leading position */
870
0
    *outlen = out - outstart;
871
0
    *inlen = processed - instart;
872
0
    return(-2);
873
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
874
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
875
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
876
0
      else {
877
          /* no chance for this in UTF-16 */
878
0
    *outlen = out - outstart;
879
0
    *inlen = processed - instart;
880
0
    return(-2);
881
0
      }
882
883
0
      if (inend - in < trailing) {
884
0
          break;
885
0
      }
886
887
0
      for ( ; trailing; trailing--) {
888
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
889
0
          c <<= 6;
890
0
          c |= d & 0x3F;
891
0
      }
892
893
      /* assertion: c is a single UTF-4 value */
894
0
        if (c < 0x10000) {
895
0
            if (out >= outend)  break;
896
0
      if (xmlLittleEndian) {
897
0
    tmp = (unsigned char *) out;
898
0
    *tmp = c >> 8;
899
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
900
0
    out++;
901
0
      } else {
902
0
    *out++ = c;
903
0
      }
904
0
        }
905
0
        else if (c < 0x110000) {
906
0
            if (out+1 >= outend)  break;
907
0
            c -= 0x10000;
908
0
      if (xmlLittleEndian) {
909
0
    tmp1 = 0xD800 | (c >> 10);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp1 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
913
0
    out++;
914
915
0
    tmp2 = 0xDC00 | (c & 0x03FF);
916
0
    tmp = (unsigned char *) out;
917
0
    *tmp = tmp2 >> 8;
918
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
919
0
    out++;
920
0
      } else {
921
0
    *out++ = 0xD800 | (c >> 10);
922
0
    *out++ = 0xDC00 | (c & 0x03FF);
923
0
      }
924
0
        }
925
0
        else
926
0
      break;
927
0
  processed = in;
928
0
    }
929
0
    *outlen = (out - outstart) * 2;
930
0
    *inlen = processed - instart;
931
0
    return(*outlen);
932
0
}
933
#endif /* LIBXML_OUTPUT_ENABLED */
934
935
/************************************************************************
936
 *                  *
937
 *    Generic encoding handling routines      *
938
 *                  *
939
 ************************************************************************/
940
941
/**
942
 * xmlDetectCharEncoding:
943
 * @in:  a pointer to the first bytes of the XML entity, must be at least
944
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
945
 * @len:  pointer to the length of the buffer
946
 *
947
 * Guess the encoding of the entity using the first bytes of the entity content
948
 * according to the non-normative appendix F of the XML-1.0 recommendation.
949
 *
950
 * Returns one of the XML_CHAR_ENCODING_... values.
951
 */
952
xmlCharEncoding
953
xmlDetectCharEncoding(const unsigned char* in, int len)
954
1.40M
{
955
1.40M
    if (in == NULL)
956
0
        return(XML_CHAR_ENCODING_NONE);
957
1.40M
    if (len >= 4) {
958
1.40M
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
1.40M
      (in[2] == 0x00) && (in[3] == 0x3C))
960
452
      return(XML_CHAR_ENCODING_UCS4BE);
961
1.40M
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
962
1.40M
      (in[2] == 0x00) && (in[3] == 0x00))
963
1.18k
      return(XML_CHAR_ENCODING_UCS4LE);
964
1.40M
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
965
1.40M
      (in[2] == 0x3C) && (in[3] == 0x00))
966
114
      return(XML_CHAR_ENCODING_UCS4_2143);
967
1.40M
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
968
1.40M
      (in[2] == 0x00) && (in[3] == 0x00))
969
156
      return(XML_CHAR_ENCODING_UCS4_3412);
970
1.40M
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
971
1.40M
      (in[2] == 0xA7) && (in[3] == 0x94))
972
7.02k
      return(XML_CHAR_ENCODING_EBCDIC);
973
1.40M
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
974
1.40M
      (in[2] == 0x78) && (in[3] == 0x6D))
975
357k
      return(XML_CHAR_ENCODING_UTF8);
976
  /*
977
   * Although not part of the recommendation, we also
978
   * attempt an "auto-recognition" of UTF-16LE and
979
   * UTF-16BE encodings.
980
   */
981
1.04M
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
982
1.04M
      (in[2] == 0x3F) && (in[3] == 0x00))
983
834
      return(XML_CHAR_ENCODING_UTF16LE);
984
1.04M
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
985
1.04M
      (in[2] == 0x00) && (in[3] == 0x3F))
986
619
      return(XML_CHAR_ENCODING_UTF16BE);
987
1.04M
    }
988
1.04M
    if (len >= 3) {
989
  /*
990
   * Errata on XML-1.0 June 20 2001
991
   * We now allow an UTF8 encoded BOM
992
   */
993
1.04M
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
994
1.04M
      (in[2] == 0xBF))
995
3.60k
      return(XML_CHAR_ENCODING_UTF8);
996
1.04M
    }
997
    /* For UTF-16 we can recognize by the BOM */
998
1.03M
    if (len >= 2) {
999
1.03M
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
1000
3.97k
      return(XML_CHAR_ENCODING_UTF16BE);
1001
1.03M
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
1002
7.94k
      return(XML_CHAR_ENCODING_UTF16LE);
1003
1.03M
    }
1004
1.02M
    return(XML_CHAR_ENCODING_NONE);
1005
1.03M
}
1006
1007
/**
1008
 * xmlCleanupEncodingAliases:
1009
 *
1010
 * Unregisters all aliases
1011
 */
1012
void
1013
0
xmlCleanupEncodingAliases(void) {
1014
0
    int i;
1015
1016
0
    if (xmlCharEncodingAliases == NULL)
1017
0
  return;
1018
1019
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1020
0
  if (xmlCharEncodingAliases[i].name != NULL)
1021
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1022
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1023
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1024
0
    }
1025
0
    xmlCharEncodingAliasesNb = 0;
1026
0
    xmlCharEncodingAliasesMax = 0;
1027
0
    xmlFree(xmlCharEncodingAliases);
1028
0
    xmlCharEncodingAliases = NULL;
1029
0
}
1030
1031
/**
1032
 * xmlGetEncodingAlias:
1033
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1034
 *
1035
 * Lookup an encoding name for the given alias.
1036
 *
1037
 * Returns NULL if not found, otherwise the original name
1038
 */
1039
const char *
1040
154k
xmlGetEncodingAlias(const char *alias) {
1041
154k
    int i;
1042
154k
    char upper[100];
1043
1044
154k
    if (alias == NULL)
1045
0
  return(NULL);
1046
1047
154k
    if (xmlCharEncodingAliases == NULL)
1048
154k
  return(NULL);
1049
1050
0
    for (i = 0;i < 99;i++) {
1051
0
        upper[i] = toupper(alias[i]);
1052
0
  if (upper[i] == 0) break;
1053
0
    }
1054
0
    upper[i] = 0;
1055
1056
    /*
1057
     * Walk down the list looking for a definition of the alias
1058
     */
1059
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1060
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1061
0
      return(xmlCharEncodingAliases[i].name);
1062
0
  }
1063
0
    }
1064
0
    return(NULL);
1065
0
}
1066
1067
/**
1068
 * xmlAddEncodingAlias:
1069
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1070
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1071
 *
1072
 * Registers an alias @alias for an encoding named @name. Existing alias
1073
 * will be overwritten.
1074
 *
1075
 * Returns 0 in case of success, -1 in case of error
1076
 */
1077
int
1078
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1079
0
    int i;
1080
0
    char upper[100];
1081
1082
0
    if ((name == NULL) || (alias == NULL))
1083
0
  return(-1);
1084
1085
0
    for (i = 0;i < 99;i++) {
1086
0
        upper[i] = toupper(alias[i]);
1087
0
  if (upper[i] == 0) break;
1088
0
    }
1089
0
    upper[i] = 0;
1090
1091
0
    if (xmlCharEncodingAliases == NULL) {
1092
0
  xmlCharEncodingAliasesNb = 0;
1093
0
  xmlCharEncodingAliasesMax = 20;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1096
0
  if (xmlCharEncodingAliases == NULL)
1097
0
      return(-1);
1098
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1099
0
  xmlCharEncodingAliasesMax *= 2;
1100
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1101
0
        xmlRealloc(xmlCharEncodingAliases,
1102
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1103
0
    }
1104
    /*
1105
     * Walk down the list looking for a definition of the alias
1106
     */
1107
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1108
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1109
      /*
1110
       * Replace the definition.
1111
       */
1112
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1113
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1114
0
      return(0);
1115
0
  }
1116
0
    }
1117
    /*
1118
     * Add the definition
1119
     */
1120
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1121
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1122
0
    xmlCharEncodingAliasesNb++;
1123
0
    return(0);
1124
0
}
1125
1126
/**
1127
 * xmlDelEncodingAlias:
1128
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1129
 *
1130
 * Unregisters an encoding alias @alias
1131
 *
1132
 * Returns 0 in case of success, -1 in case of error
1133
 */
1134
int
1135
0
xmlDelEncodingAlias(const char *alias) {
1136
0
    int i;
1137
1138
0
    if (alias == NULL)
1139
0
  return(-1);
1140
1141
0
    if (xmlCharEncodingAliases == NULL)
1142
0
  return(-1);
1143
    /*
1144
     * Walk down the list looking for a definition of the alias
1145
     */
1146
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1147
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1148
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1149
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1150
0
      xmlCharEncodingAliasesNb--;
1151
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1152
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1153
0
      return(0);
1154
0
  }
1155
0
    }
1156
0
    return(-1);
1157
0
}
1158
1159
/**
1160
 * xmlParseCharEncoding:
1161
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1162
 *
1163
 * Compare the string to the encoding schemes already known. Note
1164
 * that the comparison is case insensitive accordingly to the section
1165
 * [XML] 4.3.3 Character Encoding in Entities.
1166
 *
1167
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1168
 * if not recognized.
1169
 */
1170
xmlCharEncoding
1171
xmlParseCharEncoding(const char* name)
1172
183k
{
1173
183k
    const char *alias;
1174
183k
    char upper[500];
1175
183k
    int i;
1176
1177
183k
    if (name == NULL)
1178
146k
  return(XML_CHAR_ENCODING_NONE);
1179
1180
    /*
1181
     * Do the alias resolution
1182
     */
1183
36.6k
    alias = xmlGetEncodingAlias(name);
1184
36.6k
    if (alias != NULL)
1185
0
  name = alias;
1186
1187
310k
    for (i = 0;i < 499;i++) {
1188
310k
        upper[i] = toupper(name[i]);
1189
310k
  if (upper[i] == 0) break;
1190
310k
    }
1191
36.6k
    upper[i] = 0;
1192
1193
36.6k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1194
36.5k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1195
30.3k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1196
1197
    /*
1198
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1199
     *       already found and in use
1200
     */
1201
30.2k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
30.2k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1203
1204
30.2k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205
30.2k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1206
30.2k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1207
1208
    /*
1209
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1210
     *       already found and in use
1211
     */
1212
30.2k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213
28.2k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
28.2k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1215
1216
1217
28.2k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1218
22.2k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1219
22.2k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1220
1221
22.2k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1222
22.1k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1223
22.1k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1224
1225
22.1k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1226
22.0k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1227
21.9k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1228
20.9k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1229
20.8k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1230
20.7k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1231
20.6k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1232
1233
20.5k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1234
20.5k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1235
20.5k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1236
1237
#ifdef DEBUG_ENCODING
1238
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1239
#endif
1240
20.4k
    return(XML_CHAR_ENCODING_ERROR);
1241
20.5k
}
1242
1243
/**
1244
 * xmlGetCharEncodingName:
1245
 * @enc:  the encoding
1246
 *
1247
 * The "canonical" name for XML encoding.
1248
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1249
 * Section 4.3.3  Character Encoding in Entities
1250
 *
1251
 * Returns the canonical name for the given encoding
1252
 */
1253
1254
const char*
1255
2.29k
xmlGetCharEncodingName(xmlCharEncoding enc) {
1256
2.29k
    switch (enc) {
1257
0
        case XML_CHAR_ENCODING_ERROR:
1258
0
      return(NULL);
1259
0
        case XML_CHAR_ENCODING_NONE:
1260
0
      return(NULL);
1261
0
        case XML_CHAR_ENCODING_UTF8:
1262
0
      return("UTF-8");
1263
0
        case XML_CHAR_ENCODING_UTF16LE:
1264
0
      return("UTF-16");
1265
0
        case XML_CHAR_ENCODING_UTF16BE:
1266
0
      return("UTF-16");
1267
0
        case XML_CHAR_ENCODING_EBCDIC:
1268
0
            return("EBCDIC");
1269
2.02k
        case XML_CHAR_ENCODING_UCS4LE:
1270
2.02k
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS4BE:
1272
0
            return("ISO-10646-UCS-4");
1273
114
        case XML_CHAR_ENCODING_UCS4_2143:
1274
114
            return("ISO-10646-UCS-4");
1275
156
        case XML_CHAR_ENCODING_UCS4_3412:
1276
156
            return("ISO-10646-UCS-4");
1277
0
        case XML_CHAR_ENCODING_UCS2:
1278
0
            return("ISO-10646-UCS-2");
1279
0
        case XML_CHAR_ENCODING_8859_1:
1280
0
      return("ISO-8859-1");
1281
0
        case XML_CHAR_ENCODING_8859_2:
1282
0
      return("ISO-8859-2");
1283
0
        case XML_CHAR_ENCODING_8859_3:
1284
0
      return("ISO-8859-3");
1285
0
        case XML_CHAR_ENCODING_8859_4:
1286
0
      return("ISO-8859-4");
1287
0
        case XML_CHAR_ENCODING_8859_5:
1288
0
      return("ISO-8859-5");
1289
0
        case XML_CHAR_ENCODING_8859_6:
1290
0
      return("ISO-8859-6");
1291
0
        case XML_CHAR_ENCODING_8859_7:
1292
0
      return("ISO-8859-7");
1293
0
        case XML_CHAR_ENCODING_8859_8:
1294
0
      return("ISO-8859-8");
1295
0
        case XML_CHAR_ENCODING_8859_9:
1296
0
      return("ISO-8859-9");
1297
0
        case XML_CHAR_ENCODING_2022_JP:
1298
0
            return("ISO-2022-JP");
1299
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1300
0
            return("Shift-JIS");
1301
0
        case XML_CHAR_ENCODING_EUC_JP:
1302
0
            return("EUC-JP");
1303
0
  case XML_CHAR_ENCODING_ASCII:
1304
0
      return(NULL);
1305
2.29k
    }
1306
0
    return(NULL);
1307
2.29k
}
1308
1309
/************************************************************************
1310
 *                  *
1311
 *      Char encoding handlers        *
1312
 *                  *
1313
 ************************************************************************/
1314
1315
1316
/* the size should be growable, but it's not a big deal ... */
1317
35.2k
#define MAX_ENCODING_HANDLERS 50
1318
static xmlCharEncodingHandlerPtr *handlers = NULL;
1319
static int nbCharEncodingHandler = 0;
1320
1321
/*
1322
 * The default is UTF-8 for XML, that's also the default used for the
1323
 * parser internals, so the default encoding handler is NULL
1324
 */
1325
1326
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1327
1328
/**
1329
 * xmlNewCharEncodingHandler:
1330
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1331
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1332
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1333
 *
1334
 * Create and registers an xmlCharEncodingHandler.
1335
 *
1336
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1337
 */
1338
xmlCharEncodingHandlerPtr
1339
xmlNewCharEncodingHandler(const char *name,
1340
                          xmlCharEncodingInputFunc input,
1341
31.3k
                          xmlCharEncodingOutputFunc output) {
1342
31.3k
    xmlCharEncodingHandlerPtr handler;
1343
31.3k
    const char *alias;
1344
31.3k
    char upper[500];
1345
31.3k
    int i;
1346
31.3k
    char *up = NULL;
1347
1348
    /*
1349
     * Do the alias resolution
1350
     */
1351
31.3k
    alias = xmlGetEncodingAlias(name);
1352
31.3k
    if (alias != NULL)
1353
0
  name = alias;
1354
1355
    /*
1356
     * Keep only the uppercase version of the encoding.
1357
     */
1358
31.3k
    if (name == NULL) {
1359
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1360
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1361
0
  return(NULL);
1362
0
    }
1363
242k
    for (i = 0;i < 499;i++) {
1364
242k
        upper[i] = toupper(name[i]);
1365
242k
  if (upper[i] == 0) break;
1366
242k
    }
1367
31.3k
    upper[i] = 0;
1368
31.3k
    up = xmlMemStrdup(upper);
1369
31.3k
    if (up == NULL) {
1370
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1371
0
  return(NULL);
1372
0
    }
1373
1374
    /*
1375
     * allocate and fill-up an handler block.
1376
     */
1377
31.3k
    handler = (xmlCharEncodingHandlerPtr)
1378
31.3k
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1379
31.3k
    if (handler == NULL) {
1380
0
        xmlFree(up);
1381
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1382
0
  return(NULL);
1383
0
    }
1384
31.3k
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1385
31.3k
    handler->input = input;
1386
31.3k
    handler->output = output;
1387
31.3k
    handler->name = up;
1388
1389
31.3k
#ifdef LIBXML_ICONV_ENABLED
1390
31.3k
    handler->iconv_in = NULL;
1391
31.3k
    handler->iconv_out = NULL;
1392
31.3k
#endif
1393
#ifdef LIBXML_ICU_ENABLED
1394
    handler->uconv_in = NULL;
1395
    handler->uconv_out = NULL;
1396
#endif
1397
1398
    /*
1399
     * registers and returns the handler.
1400
     */
1401
31.3k
    xmlRegisterCharEncodingHandler(handler);
1402
#ifdef DEBUG_ENCODING
1403
    xmlGenericError(xmlGenericErrorContext,
1404
      "Registered encoding handler for %s\n", name);
1405
#endif
1406
31.3k
    return(handler);
1407
31.3k
}
1408
1409
/**
1410
 * xmlInitCharEncodingHandlers:
1411
 *
1412
 * DEPRECATED: This function will be made private. Call xmlInitParser to
1413
 * initialize the library.
1414
 *
1415
 * Initialize the char encoding support, it registers the default
1416
 * encoding supported.
1417
 * NOTE: while public, this function usually doesn't need to be called
1418
 *       in normal processing.
1419
 */
1420
void
1421
3.91k
xmlInitCharEncodingHandlers(void) {
1422
3.91k
    unsigned short int tst = 0x1234;
1423
3.91k
    unsigned char *ptr = (unsigned char *) &tst;
1424
1425
3.91k
    if (handlers != NULL) return;
1426
1427
3.91k
    handlers = (xmlCharEncodingHandlerPtr *)
1428
3.91k
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1429
1430
3.91k
    if (*ptr == 0x12) xmlLittleEndian = 0;
1431
3.91k
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1432
0
    else {
1433
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1434
0
                 "Odd problem at endianness detection\n", NULL);
1435
0
    }
1436
1437
3.91k
    if (handlers == NULL) {
1438
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1439
0
  return;
1440
0
    }
1441
3.91k
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1442
3.91k
#ifdef LIBXML_OUTPUT_ENABLED
1443
3.91k
    xmlUTF16LEHandler =
1444
3.91k
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1445
3.91k
    xmlUTF16BEHandler =
1446
3.91k
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1447
3.91k
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1448
3.91k
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1449
3.91k
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1450
3.91k
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1451
3.91k
#ifdef LIBXML_HTML_ENABLED
1452
3.91k
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1453
3.91k
#endif
1454
#else
1455
    xmlUTF16LEHandler =
1456
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1457
    xmlUTF16BEHandler =
1458
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1459
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1460
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1461
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1462
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1463
#endif /* LIBXML_OUTPUT_ENABLED */
1464
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1465
#ifdef LIBXML_ISO8859X_ENABLED
1466
    xmlRegisterCharEncodingHandlersISO8859x ();
1467
#endif
1468
#endif
1469
1470
3.91k
}
1471
1472
/**
1473
 * xmlCleanupCharEncodingHandlers:
1474
 *
1475
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1476
 * to free global state but see the warnings there. xmlCleanupParser
1477
 * should be only called once at program exit. In most cases, you don't
1478
 * have call cleanup functions at all.
1479
 *
1480
 * Cleanup the memory allocated for the char encoding support, it
1481
 * unregisters all the encoding handlers and the aliases.
1482
 */
1483
void
1484
0
xmlCleanupCharEncodingHandlers(void) {
1485
0
    xmlCleanupEncodingAliases();
1486
1487
0
    if (handlers == NULL) return;
1488
1489
0
    for (;nbCharEncodingHandler > 0;) {
1490
0
        nbCharEncodingHandler--;
1491
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1492
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1493
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1494
0
      xmlFree(handlers[nbCharEncodingHandler]);
1495
0
  }
1496
0
    }
1497
0
    xmlFree(handlers);
1498
0
    handlers = NULL;
1499
0
    nbCharEncodingHandler = 0;
1500
0
    xmlDefaultCharEncodingHandler = NULL;
1501
0
}
1502
1503
/**
1504
 * xmlRegisterCharEncodingHandler:
1505
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1506
 *
1507
 * Register the char encoding handler, surprising, isn't it ?
1508
 */
1509
void
1510
31.3k
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1511
31.3k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1512
31.3k
    if ((handler == NULL) || (handlers == NULL)) {
1513
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1514
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1515
0
        goto free_handler;
1516
0
    }
1517
1518
31.3k
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1519
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1520
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1521
0
                 "MAX_ENCODING_HANDLERS");
1522
0
        goto free_handler;
1523
0
    }
1524
31.3k
    handlers[nbCharEncodingHandler++] = handler;
1525
31.3k
    return;
1526
1527
0
free_handler:
1528
0
    if (handler != NULL) {
1529
0
        if (handler->name != NULL) {
1530
0
            xmlFree(handler->name);
1531
0
        }
1532
0
        xmlFree(handler);
1533
0
    }
1534
0
}
1535
1536
/**
1537
 * xmlGetCharEncodingHandler:
1538
 * @enc:  an xmlCharEncoding value.
1539
 *
1540
 * Search in the registered set the handler able to read/write that encoding.
1541
 *
1542
 * Returns the handler or NULL if not found
1543
 */
1544
xmlCharEncodingHandlerPtr
1545
2.00M
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1546
2.00M
    xmlCharEncodingHandlerPtr handler;
1547
1548
2.00M
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1549
2.00M
    switch (enc) {
1550
0
        case XML_CHAR_ENCODING_ERROR:
1551
0
      return(NULL);
1552
1.86M
        case XML_CHAR_ENCODING_NONE:
1553
1.86M
      return(NULL);
1554
109k
        case XML_CHAR_ENCODING_UTF8:
1555
109k
      return(NULL);
1556
11.6k
        case XML_CHAR_ENCODING_UTF16LE:
1557
11.6k
      return(xmlUTF16LEHandler);
1558
5.94k
        case XML_CHAR_ENCODING_UTF16BE:
1559
5.94k
      return(xmlUTF16BEHandler);
1560
9.34k
        case XML_CHAR_ENCODING_EBCDIC:
1561
9.34k
            handler = xmlFindCharEncodingHandler("EBCDIC");
1562
9.34k
            if (handler != NULL) return(handler);
1563
9.34k
            handler = xmlFindCharEncodingHandler("ebcdic");
1564
9.34k
            if (handler != NULL) return(handler);
1565
9.34k
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1566
9.34k
            if (handler != NULL) return(handler);
1567
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1568
0
            if (handler != NULL) return(handler);
1569
0
      break;
1570
587
        case XML_CHAR_ENCODING_UCS4BE:
1571
587
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1572
587
            if (handler != NULL) return(handler);
1573
587
            handler = xmlFindCharEncodingHandler("UCS-4");
1574
587
            if (handler != NULL) return(handler);
1575
0
            handler = xmlFindCharEncodingHandler("UCS4");
1576
0
            if (handler != NULL) return(handler);
1577
0
      break;
1578
1.43k
        case XML_CHAR_ENCODING_UCS4LE:
1579
1.43k
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1580
1.43k
            if (handler != NULL) return(handler);
1581
1.43k
            handler = xmlFindCharEncodingHandler("UCS-4");
1582
1.43k
            if (handler != NULL) return(handler);
1583
0
            handler = xmlFindCharEncodingHandler("UCS4");
1584
0
            if (handler != NULL) return(handler);
1585
0
      break;
1586
150
        case XML_CHAR_ENCODING_UCS4_2143:
1587
150
      break;
1588
203
        case XML_CHAR_ENCODING_UCS4_3412:
1589
203
      break;
1590
0
        case XML_CHAR_ENCODING_UCS2:
1591
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1592
0
            if (handler != NULL) return(handler);
1593
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1594
0
            if (handler != NULL) return(handler);
1595
0
            handler = xmlFindCharEncodingHandler("UCS2");
1596
0
            if (handler != NULL) return(handler);
1597
0
      break;
1598
1599
      /*
1600
       * We used to keep ISO Latin encodings native in the
1601
       * generated data. This led to so many problems that
1602
       * this has been removed. One can still change this
1603
       * back by registering no-ops encoders for those
1604
       */
1605
192
        case XML_CHAR_ENCODING_8859_1:
1606
192
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1607
192
      if (handler != NULL) return(handler);
1608
0
      break;
1609
50
        case XML_CHAR_ENCODING_8859_2:
1610
50
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1611
50
      if (handler != NULL) return(handler);
1612
0
      break;
1613
52
        case XML_CHAR_ENCODING_8859_3:
1614
52
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1615
52
      if (handler != NULL) return(handler);
1616
0
      break;
1617
40
        case XML_CHAR_ENCODING_8859_4:
1618
40
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1619
40
      if (handler != NULL) return(handler);
1620
0
      break;
1621
55
        case XML_CHAR_ENCODING_8859_5:
1622
55
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1623
55
      if (handler != NULL) return(handler);
1624
0
      break;
1625
45
        case XML_CHAR_ENCODING_8859_6:
1626
45
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1627
45
      if (handler != NULL) return(handler);
1628
0
      break;
1629
51
        case XML_CHAR_ENCODING_8859_7:
1630
51
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1631
51
      if (handler != NULL) return(handler);
1632
0
      break;
1633
41
        case XML_CHAR_ENCODING_8859_8:
1634
41
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1635
41
      if (handler != NULL) return(handler);
1636
0
      break;
1637
54
        case XML_CHAR_ENCODING_8859_9:
1638
54
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1639
54
      if (handler != NULL) return(handler);
1640
0
      break;
1641
1642
1643
0
        case XML_CHAR_ENCODING_2022_JP:
1644
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1645
0
            if (handler != NULL) return(handler);
1646
0
      break;
1647
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1648
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1649
0
            if (handler != NULL) return(handler);
1650
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1651
0
            if (handler != NULL) return(handler);
1652
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1653
0
            if (handler != NULL) return(handler);
1654
0
      break;
1655
0
        case XML_CHAR_ENCODING_EUC_JP:
1656
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1657
0
            if (handler != NULL) return(handler);
1658
0
      break;
1659
0
  default:
1660
0
      break;
1661
2.00M
    }
1662
1663
#ifdef DEBUG_ENCODING
1664
    xmlGenericError(xmlGenericErrorContext,
1665
      "No handler found for encoding %d\n", enc);
1666
#endif
1667
353
    return(NULL);
1668
2.00M
}
1669
1670
/**
1671
 * xmlFindCharEncodingHandler:
1672
 * @name:  a string describing the char encoding.
1673
 *
1674
 * Search in the registered set the handler able to read/write that encoding.
1675
 *
1676
 * Returns the handler or NULL if not found
1677
 */
1678
xmlCharEncodingHandlerPtr
1679
86.0k
xmlFindCharEncodingHandler(const char *name) {
1680
86.0k
    const char *nalias;
1681
86.0k
    const char *norig;
1682
86.0k
    xmlCharEncoding alias;
1683
86.0k
#ifdef LIBXML_ICONV_ENABLED
1684
86.0k
    xmlCharEncodingHandlerPtr enc;
1685
86.0k
    iconv_t icv_in, icv_out;
1686
86.0k
#endif /* LIBXML_ICONV_ENABLED */
1687
#ifdef LIBXML_ICU_ENABLED
1688
    xmlCharEncodingHandlerPtr encu;
1689
    uconv_t *ucv_in, *ucv_out;
1690
#endif /* LIBXML_ICU_ENABLED */
1691
86.0k
    char upper[100];
1692
86.0k
    int i;
1693
1694
86.0k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1695
86.0k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1696
86.0k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1697
1698
    /*
1699
     * Do the alias resolution
1700
     */
1701
86.0k
    norig = name;
1702
86.0k
    nalias = xmlGetEncodingAlias(name);
1703
86.0k
    if (nalias != NULL)
1704
0
  name = nalias;
1705
1706
    /*
1707
     * Check first for directly registered encoding names
1708
     */
1709
820k
    for (i = 0;i < 99;i++) {
1710
820k
        upper[i] = toupper(name[i]);
1711
820k
  if (upper[i] == 0) break;
1712
820k
    }
1713
86.0k
    upper[i] = 0;
1714
1715
86.0k
    if (handlers != NULL) {
1716
585k
        for (i = 0;i < nbCharEncodingHandler; i++) {
1717
542k
            if (!strcmp(upper, handlers[i]->name)) {
1718
#ifdef DEBUG_ENCODING
1719
                xmlGenericError(xmlGenericErrorContext,
1720
                        "Found registered handler for encoding %s\n", name);
1721
#endif
1722
43.1k
                return(handlers[i]);
1723
43.1k
            }
1724
542k
        }
1725
86.0k
    }
1726
1727
42.9k
#ifdef LIBXML_ICONV_ENABLED
1728
    /* check whether iconv can handle this */
1729
42.9k
    icv_in = iconv_open("UTF-8", name);
1730
42.9k
    icv_out = iconv_open(name, "UTF-8");
1731
42.9k
    if (icv_in == (iconv_t) -1) {
1732
21.6k
        icv_in = iconv_open("UTF-8", upper);
1733
21.6k
    }
1734
42.9k
    if (icv_out == (iconv_t) -1) {
1735
21.6k
  icv_out = iconv_open(upper, "UTF-8");
1736
21.6k
    }
1737
42.9k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1738
21.3k
      enc = (xmlCharEncodingHandlerPtr)
1739
21.3k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1740
21.3k
      if (enc == NULL) {
1741
0
          iconv_close(icv_in);
1742
0
          iconv_close(icv_out);
1743
0
    return(NULL);
1744
0
      }
1745
21.3k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1746
21.3k
      enc->name = xmlMemStrdup(name);
1747
21.3k
      enc->input = NULL;
1748
21.3k
      enc->output = NULL;
1749
21.3k
      enc->iconv_in = icv_in;
1750
21.3k
      enc->iconv_out = icv_out;
1751
#ifdef DEBUG_ENCODING
1752
            xmlGenericError(xmlGenericErrorContext,
1753
        "Found iconv handler for encoding %s\n", name);
1754
#endif
1755
21.3k
      return enc;
1756
21.6k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1758
0
        "iconv : problems with filters for '%s'\n", name);
1759
0
      if (icv_in != (iconv_t) -1)
1760
0
    iconv_close(icv_in);
1761
0
      else
1762
0
    iconv_close(icv_out);
1763
0
    }
1764
21.6k
#endif /* LIBXML_ICONV_ENABLED */
1765
#ifdef LIBXML_ICU_ENABLED
1766
    /* check whether icu can handle this */
1767
    ucv_in = openIcuConverter(name, 1);
1768
    ucv_out = openIcuConverter(name, 0);
1769
    if (ucv_in != NULL && ucv_out != NULL) {
1770
      encu = (xmlCharEncodingHandlerPtr)
1771
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1772
      if (encu == NULL) {
1773
                closeIcuConverter(ucv_in);
1774
                closeIcuConverter(ucv_out);
1775
    return(NULL);
1776
      }
1777
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1778
      encu->name = xmlMemStrdup(name);
1779
      encu->input = NULL;
1780
      encu->output = NULL;
1781
      encu->uconv_in = ucv_in;
1782
      encu->uconv_out = ucv_out;
1783
#ifdef DEBUG_ENCODING
1784
            xmlGenericError(xmlGenericErrorContext,
1785
        "Found ICU converter handler for encoding %s\n", name);
1786
#endif
1787
      return encu;
1788
    } else if (ucv_in != NULL || ucv_out != NULL) {
1789
            closeIcuConverter(ucv_in);
1790
            closeIcuConverter(ucv_out);
1791
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1792
        "ICU converter : problems with filters for '%s'\n", name);
1793
    }
1794
#endif /* LIBXML_ICU_ENABLED */
1795
1796
#ifdef DEBUG_ENCODING
1797
    xmlGenericError(xmlGenericErrorContext,
1798
      "No handler found for encoding %s\n", name);
1799
#endif
1800
1801
    /*
1802
     * Fallback using the canonical names
1803
     */
1804
21.6k
    alias = xmlParseCharEncoding(norig);
1805
21.6k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1806
2.02k
        const char* canon;
1807
2.02k
        canon = xmlGetCharEncodingName(alias);
1808
2.02k
        if ((canon != NULL) && (strcmp(name, canon))) {
1809
0
      return(xmlFindCharEncodingHandler(canon));
1810
0
        }
1811
2.02k
    }
1812
1813
    /* If "none of the above", give up */
1814
21.6k
    return(NULL);
1815
21.6k
}
1816
1817
/************************************************************************
1818
 *                  *
1819
 *    ICONV based generic conversion functions    *
1820
 *                  *
1821
 ************************************************************************/
1822
1823
#ifdef LIBXML_ICONV_ENABLED
1824
/**
1825
 * xmlIconvWrapper:
1826
 * @cd:   iconv converter data structure
1827
 * @out:  a pointer to an array of bytes to store the result
1828
 * @outlen:  the length of @out
1829
 * @in:  a pointer to an array of input bytes
1830
 * @inlen:  the length of @in
1831
 *
1832
 * Returns 0 if success, or
1833
 *     -1 by lack of space, or
1834
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1835
 *        the result of transformation can't fit into the encoding we want), or
1836
 *     -3 if there the last byte can't form a single output char.
1837
 *
1838
 * The value of @inlen after return is the number of octets consumed
1839
 *     as the return value is positive, else unpredictable.
1840
 * The value of @outlen after return is the number of octets produced.
1841
 */
1842
static int
1843
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1844
177k
                const unsigned char *in, int *inlen) {
1845
177k
    size_t icv_inlen, icv_outlen;
1846
177k
    const char *icv_in = (const char *) in;
1847
177k
    char *icv_out = (char *) out;
1848
177k
    size_t ret;
1849
1850
177k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1851
1.31k
        if (outlen != NULL) *outlen = 0;
1852
1.31k
        return(-1);
1853
1.31k
    }
1854
176k
    icv_inlen = *inlen;
1855
176k
    icv_outlen = *outlen;
1856
    /*
1857
     * Some versions take const, other versions take non-const input.
1858
     */
1859
176k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1860
176k
    *inlen -= icv_inlen;
1861
176k
    *outlen -= icv_outlen;
1862
176k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1863
19.3k
#ifdef EILSEQ
1864
19.3k
        if (errno == EILSEQ) {
1865
15.7k
            return -2;
1866
15.7k
        } else
1867
3.60k
#endif
1868
3.60k
#ifdef E2BIG
1869
3.60k
        if (errno == E2BIG) {
1870
141
            return -1;
1871
141
        } else
1872
3.46k
#endif
1873
3.46k
#ifdef EINVAL
1874
3.46k
        if (errno == EINVAL) {
1875
3.46k
            return -3;
1876
3.46k
        } else
1877
0
#endif
1878
0
        {
1879
0
            return -3;
1880
0
        }
1881
19.3k
    }
1882
157k
    return 0;
1883
176k
}
1884
#endif /* LIBXML_ICONV_ENABLED */
1885
1886
/************************************************************************
1887
 *                  *
1888
 *    ICU based generic conversion functions    *
1889
 *                  *
1890
 ************************************************************************/
1891
1892
#ifdef LIBXML_ICU_ENABLED
1893
/**
1894
 * xmlUconvWrapper:
1895
 * @cd: ICU uconverter data structure
1896
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1897
 * @out:  a pointer to an array of bytes to store the result
1898
 * @outlen:  the length of @out
1899
 * @in:  a pointer to an array of input bytes
1900
 * @inlen:  the length of @in
1901
 * @flush: if true, indicates end of input
1902
 *
1903
 * Returns 0 if success, or
1904
 *     -1 by lack of space, or
1905
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1906
 *        the result of transformation can't fit into the encoding we want), or
1907
 *     -3 if there the last byte can't form a single output char.
1908
 *
1909
 * The value of @inlen after return is the number of octets consumed
1910
 *     as the return value is positive, else unpredictable.
1911
 * The value of @outlen after return is the number of octets produced.
1912
 */
1913
static int
1914
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1915
                const unsigned char *in, int *inlen, int flush) {
1916
    const char *ucv_in = (const char *) in;
1917
    char *ucv_out = (char *) out;
1918
    UErrorCode err = U_ZERO_ERROR;
1919
1920
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1921
        if (outlen != NULL) *outlen = 0;
1922
        return(-1);
1923
    }
1924
1925
    if (toUnicode) {
1926
        /* encoding => UTF-16 => UTF-8 */
1927
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1928
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1929
                       &cd->pivot_source, &cd->pivot_target,
1930
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1931
    } else {
1932
        /* UTF-8 => UTF-16 => encoding */
1933
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1934
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1935
                       &cd->pivot_source, &cd->pivot_target,
1936
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1937
    }
1938
    *inlen = ucv_in - (const char*) in;
1939
    *outlen = ucv_out - (char *) out;
1940
    if (U_SUCCESS(err)) {
1941
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1942
        if (flush)
1943
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1944
        return 0;
1945
    }
1946
    if (err == U_BUFFER_OVERFLOW_ERROR)
1947
        return -1;
1948
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1949
        return -2;
1950
    return -3;
1951
}
1952
#endif /* LIBXML_ICU_ENABLED */
1953
1954
/************************************************************************
1955
 *                  *
1956
 *    The real API used by libxml for on-the-fly conversion *
1957
 *                  *
1958
 ************************************************************************/
1959
1960
/**
1961
 * xmlEncInputChunk:
1962
 * @handler:  encoding handler
1963
 * @out:  a pointer to an array of bytes to store the result
1964
 * @outlen:  the length of @out
1965
 * @in:  a pointer to an array of input bytes
1966
 * @inlen:  the length of @in
1967
 * @flush:  flush (ICU-related)
1968
 *
1969
 * Returns 0 if success, or
1970
 *     -1 by lack of space, or
1971
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1972
 *        the result of transformation can't fit into the encoding we want), or
1973
 *     -3 if there the last byte can't form a single output char.
1974
 *
1975
 * The value of @inlen after return is the number of octets consumed
1976
 *     as the return value is 0, else unpredictable.
1977
 * The value of @outlen after return is the number of octets produced.
1978
 */
1979
static int
1980
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1981
2.44M
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
1982
2.44M
    int ret;
1983
2.44M
    (void)flush;
1984
1985
2.44M
    if (handler->input != NULL) {
1986
2.26M
        ret = handler->input(out, outlen, in, inlen);
1987
2.26M
        if (ret > 0)
1988
2.20M
           ret = 0;
1989
2.26M
    }
1990
172k
#ifdef LIBXML_ICONV_ENABLED
1991
172k
    else if (handler->iconv_in != NULL) {
1992
172k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1993
172k
    }
1994
36
#endif /* LIBXML_ICONV_ENABLED */
1995
#ifdef LIBXML_ICU_ENABLED
1996
    else if (handler->uconv_in != NULL) {
1997
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1998
                              flush);
1999
    }
2000
#endif /* LIBXML_ICU_ENABLED */
2001
36
    else {
2002
36
        *outlen = 0;
2003
36
        *inlen = 0;
2004
36
        ret = -2;
2005
36
    }
2006
2007
2.44M
    return(ret);
2008
2.44M
}
2009
2010
/**
2011
 * xmlEncOutputChunk:
2012
 * @handler:  encoding handler
2013
 * @out:  a pointer to an array of bytes to store the result
2014
 * @outlen:  the length of @out
2015
 * @in:  a pointer to an array of input bytes
2016
 * @inlen:  the length of @in
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *     -4 if no output function was found.
2024
 *
2025
 * The value of @inlen after return is the number of octets consumed
2026
 *     as the return value is 0, else unpredictable.
2027
 * The value of @outlen after return is the number of octets produced.
2028
 */
2029
static int
2030
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2031
99.9k
                  int *outlen, const unsigned char *in, int *inlen) {
2032
99.9k
    int ret;
2033
2034
99.9k
    if (handler->output != NULL) {
2035
94.6k
        ret = handler->output(out, outlen, in, inlen);
2036
94.6k
        if (ret > 0)
2037
80.5k
           ret = 0;
2038
94.6k
    }
2039
5.36k
#ifdef LIBXML_ICONV_ENABLED
2040
5.36k
    else if (handler->iconv_out != NULL) {
2041
5.36k
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2042
5.36k
    }
2043
0
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_out != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2047
                              1);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
0
    else {
2051
0
        *outlen = 0;
2052
0
        *inlen = 0;
2053
0
        ret = -4;
2054
0
    }
2055
2056
99.9k
    return(ret);
2057
99.9k
}
2058
2059
/**
2060
 * xmlCharEncFirstLine:
2061
 * @handler:  char encoding transformation data structure
2062
 * @out:  an xmlBuffer for the output.
2063
 * @in:  an xmlBuffer for the input
2064
 *
2065
 * Front-end for the encoding handler input function, but handle only
2066
 * the very first line, i.e. limit itself to 45 chars.
2067
 *
2068
 * Returns the number of byte written if success, or
2069
 *     -1 general error
2070
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071
 *        the result of transformation can't fit into the encoding we want), or
2072
 */
2073
int
2074
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075
0
                    xmlBufferPtr in) {
2076
0
    int ret;
2077
0
    int written;
2078
0
    int toconv;
2079
2080
0
    if (handler == NULL) return(-1);
2081
0
    if (out == NULL) return(-1);
2082
0
    if (in == NULL) return(-1);
2083
2084
    /* calculate space available */
2085
0
    written = out->size - out->use - 1; /* count '\0' */
2086
0
    toconv = in->use;
2087
    /*
2088
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089
     * 45 chars should be sufficient to reach the end of the encoding
2090
     * declaration without going too far inside the document content.
2091
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2092
     * The actual value depending on guessed encoding is passed as @len
2093
     * if provided
2094
     */
2095
0
    if (toconv > 180)
2096
0
        toconv = 180;
2097
0
    if (toconv * 2 >= written) {
2098
0
        xmlBufferGrow(out, toconv * 2);
2099
0
  written = out->size - out->use - 1;
2100
0
    }
2101
2102
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2103
0
                           in->content, &toconv, 0);
2104
0
    xmlBufferShrink(in, toconv);
2105
0
    out->use += written;
2106
0
    out->content[out->use] = 0;
2107
0
    if (ret == -1) ret = -3;
2108
2109
#ifdef DEBUG_ENCODING
2110
    switch (ret) {
2111
        case 0:
2112
      xmlGenericError(xmlGenericErrorContext,
2113
        "converted %d bytes to %d bytes of input\n",
2114
              toconv, written);
2115
      break;
2116
        case -1:
2117
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2118
              toconv, written, in->use);
2119
      break;
2120
        case -2:
2121
      xmlGenericError(xmlGenericErrorContext,
2122
        "input conversion failed due to input error\n");
2123
      break;
2124
        case -3:
2125
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2126
              toconv, written, in->use);
2127
      break;
2128
  default:
2129
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2130
    }
2131
#endif /* DEBUG_ENCODING */
2132
    /*
2133
     * Ignore when input buffer is not on a boundary
2134
     */
2135
0
    if (ret == -3) ret = 0;
2136
0
    if (ret == -1) ret = 0;
2137
0
    return(written ? written : ret);
2138
0
}
2139
2140
/**
2141
 * xmlCharEncFirstLineInput:
2142
 * @input: a parser input buffer
2143
 * @len:  number of bytes to convert for the first line, or -1
2144
 *
2145
 * Front-end for the encoding handler input function, but handle only
2146
 * the very first line. Point is that this is based on autodetection
2147
 * of the encoding and once that first line is converted we may find
2148
 * out that a different decoder is needed to process the input.
2149
 *
2150
 * Returns the number of byte written if success, or
2151
 *     -1 general error
2152
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2153
 *        the result of transformation can't fit into the encoding we want), or
2154
 */
2155
int
2156
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2157
53.6k
{
2158
53.6k
    int ret;
2159
53.6k
    size_t written;
2160
53.6k
    size_t toconv;
2161
53.6k
    int c_in;
2162
53.6k
    int c_out;
2163
53.6k
    xmlBufPtr in;
2164
53.6k
    xmlBufPtr out;
2165
2166
53.6k
    if ((input == NULL) || (input->encoder == NULL) ||
2167
53.6k
        (input->buffer == NULL) || (input->raw == NULL))
2168
0
        return (-1);
2169
53.6k
    out = input->buffer;
2170
53.6k
    in = input->raw;
2171
2172
53.6k
    toconv = xmlBufUse(in);
2173
53.6k
    if (toconv == 0)
2174
61
        return (0);
2175
53.5k
    written = xmlBufAvail(out);
2176
    /*
2177
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2178
     * 45 chars should be sufficient to reach the end of the encoding
2179
     * declaration without going too far inside the document content.
2180
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2181
     * The actual value depending on guessed encoding is passed as @len
2182
     * if provided
2183
     */
2184
53.5k
    if (len >= 0) {
2185
15.1k
        if (toconv > (unsigned int) len)
2186
7.46k
            toconv = len;
2187
38.4k
    } else {
2188
38.4k
        if (toconv > 180)
2189
24.7k
            toconv = 180;
2190
38.4k
    }
2191
53.5k
    if (toconv * 2 >= written) {
2192
0
        xmlBufGrow(out, toconv * 2);
2193
0
        written = xmlBufAvail(out);
2194
0
    }
2195
53.5k
    if (written > 360)
2196
53.5k
        written = 360;
2197
2198
53.5k
    c_in = toconv;
2199
53.5k
    c_out = written;
2200
53.5k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2201
53.5k
                           xmlBufContent(in), &c_in, 0);
2202
53.5k
    xmlBufShrink(in, c_in);
2203
53.5k
    xmlBufAddLen(out, c_out);
2204
53.5k
    if (ret == -1)
2205
851
        ret = -3;
2206
2207
53.5k
    switch (ret) {
2208
47.3k
        case 0:
2209
#ifdef DEBUG_ENCODING
2210
            xmlGenericError(xmlGenericErrorContext,
2211
                            "converted %d bytes to %d bytes of input\n",
2212
                            c_in, c_out);
2213
#endif
2214
47.3k
            break;
2215
0
        case -1:
2216
#ifdef DEBUG_ENCODING
2217
            xmlGenericError(xmlGenericErrorContext,
2218
                         "converted %d bytes to %d bytes of input, %d left\n",
2219
                            c_in, c_out, (int)xmlBufUse(in));
2220
#endif
2221
0
            break;
2222
1.34k
        case -3:
2223
#ifdef DEBUG_ENCODING
2224
            xmlGenericError(xmlGenericErrorContext,
2225
                        "converted %d bytes to %d bytes of input, %d left\n",
2226
                            c_in, c_out, (int)xmlBufUse(in));
2227
#endif
2228
1.34k
            break;
2229
4.91k
        case -2: {
2230
4.91k
            char buf[50];
2231
4.91k
            const xmlChar *content = xmlBufContent(in);
2232
2233
4.91k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2234
4.91k
         content[0], content[1],
2235
4.91k
         content[2], content[3]);
2236
4.91k
      buf[49] = 0;
2237
4.91k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2238
4.91k
        "input conversion failed due to input error, bytes %s\n",
2239
4.91k
               buf);
2240
4.91k
        }
2241
53.5k
    }
2242
    /*
2243
     * Ignore when input buffer is not on a boundary
2244
     */
2245
53.5k
    if (ret == -3) ret = 0;
2246
53.5k
    if (ret == -1) ret = 0;
2247
53.5k
    return(c_out ? c_out : ret);
2248
53.5k
}
2249
2250
/**
2251
 * xmlCharEncInput:
2252
 * @input: a parser input buffer
2253
 * @flush: try to flush all the raw buffer
2254
 *
2255
 * Generic front-end for the encoding handler on parser input
2256
 *
2257
 * Returns the number of byte written if success, or
2258
 *     -1 general error
2259
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2260
 *        the result of transformation can't fit into the encoding we want), or
2261
 */
2262
int
2263
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2264
2.75M
{
2265
2.75M
    int ret;
2266
2.75M
    size_t written;
2267
2.75M
    size_t toconv;
2268
2.75M
    int c_in;
2269
2.75M
    int c_out;
2270
2.75M
    xmlBufPtr in;
2271
2.75M
    xmlBufPtr out;
2272
2273
2.75M
    if ((input == NULL) || (input->encoder == NULL) ||
2274
2.75M
        (input->buffer == NULL) || (input->raw == NULL))
2275
0
        return (-1);
2276
2.75M
    out = input->buffer;
2277
2.75M
    in = input->raw;
2278
2279
2.75M
    toconv = xmlBufUse(in);
2280
2.75M
    if (toconv == 0)
2281
366k
        return (0);
2282
2.38M
    if ((toconv > 64 * 1024) && (flush == 0))
2283
0
        toconv = 64 * 1024;
2284
2.38M
    written = xmlBufAvail(out);
2285
2.38M
    if (toconv * 2 >= written) {
2286
19.5k
        xmlBufGrow(out, toconv * 2);
2287
19.5k
        written = xmlBufAvail(out);
2288
19.5k
    }
2289
2.38M
    if ((written > 128 * 1024) && (flush == 0))
2290
0
        written = 128 * 1024;
2291
2292
2.38M
    c_in = toconv;
2293
2.38M
    c_out = written;
2294
2.38M
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2295
2.38M
                           xmlBufContent(in), &c_in, flush);
2296
2.38M
    xmlBufShrink(in, c_in);
2297
2.38M
    xmlBufAddLen(out, c_out);
2298
2.38M
    if (ret == -1)
2299
30.6k
        ret = -3;
2300
2301
2.38M
    switch (ret) {
2302
2.33M
        case 0:
2303
#ifdef DEBUG_ENCODING
2304
            xmlGenericError(xmlGenericErrorContext,
2305
                            "converted %d bytes to %d bytes of input\n",
2306
                            c_in, c_out);
2307
#endif
2308
2.33M
            break;
2309
0
        case -1:
2310
#ifdef DEBUG_ENCODING
2311
            xmlGenericError(xmlGenericErrorContext,
2312
                         "converted %d bytes to %d bytes of input, %d left\n",
2313
                            c_in, c_out, (int)xmlBufUse(in));
2314
#endif
2315
0
            break;
2316
33.3k
        case -3:
2317
#ifdef DEBUG_ENCODING
2318
            xmlGenericError(xmlGenericErrorContext,
2319
                        "converted %d bytes to %d bytes of input, %d left\n",
2320
                            c_in, c_out, (int)xmlBufUse(in));
2321
#endif
2322
33.3k
            break;
2323
15.4k
        case -2: {
2324
15.4k
            char buf[50];
2325
15.4k
            const xmlChar *content = xmlBufContent(in);
2326
2327
15.4k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2328
15.4k
         content[0], content[1],
2329
15.4k
         content[2], content[3]);
2330
15.4k
      buf[49] = 0;
2331
15.4k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2332
15.4k
        "input conversion failed due to input error, bytes %s\n",
2333
15.4k
               buf);
2334
15.4k
        }
2335
2.38M
    }
2336
    /*
2337
     * Ignore when input buffer is not on a boundary
2338
     */
2339
2.38M
    if (ret == -3)
2340
33.3k
        ret = 0;
2341
2.38M
    return (c_out? c_out : ret);
2342
2.38M
}
2343
2344
/**
2345
 * xmlCharEncInFunc:
2346
 * @handler:  char encoding transformation data structure
2347
 * @out:  an xmlBuffer for the output.
2348
 * @in:  an xmlBuffer for the input
2349
 *
2350
 * Generic front-end for the encoding handler input function
2351
 *
2352
 * Returns the number of byte written if success, or
2353
 *     -1 general error
2354
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2355
 *        the result of transformation can't fit into the encoding we want), or
2356
 */
2357
int
2358
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2359
                 xmlBufferPtr in)
2360
0
{
2361
0
    int ret;
2362
0
    int written;
2363
0
    int toconv;
2364
2365
0
    if (handler == NULL)
2366
0
        return (-1);
2367
0
    if (out == NULL)
2368
0
        return (-1);
2369
0
    if (in == NULL)
2370
0
        return (-1);
2371
2372
0
    toconv = in->use;
2373
0
    if (toconv == 0)
2374
0
        return (0);
2375
0
    written = out->size - out->use -1; /* count '\0' */
2376
0
    if (toconv * 2 >= written) {
2377
0
        xmlBufferGrow(out, out->size + toconv * 2);
2378
0
        written = out->size - out->use - 1;
2379
0
    }
2380
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2381
0
                           in->content, &toconv, 1);
2382
0
    xmlBufferShrink(in, toconv);
2383
0
    out->use += written;
2384
0
    out->content[out->use] = 0;
2385
0
    if (ret == -1)
2386
0
        ret = -3;
2387
2388
0
    switch (ret) {
2389
0
        case 0:
2390
#ifdef DEBUG_ENCODING
2391
            xmlGenericError(xmlGenericErrorContext,
2392
                            "converted %d bytes to %d bytes of input\n",
2393
                            toconv, written);
2394
#endif
2395
0
            break;
2396
0
        case -1:
2397
#ifdef DEBUG_ENCODING
2398
            xmlGenericError(xmlGenericErrorContext,
2399
                         "converted %d bytes to %d bytes of input, %d left\n",
2400
                            toconv, written, in->use);
2401
#endif
2402
0
            break;
2403
0
        case -3:
2404
#ifdef DEBUG_ENCODING
2405
            xmlGenericError(xmlGenericErrorContext,
2406
                        "converted %d bytes to %d bytes of input, %d left\n",
2407
                            toconv, written, in->use);
2408
#endif
2409
0
            break;
2410
0
        case -2: {
2411
0
            char buf[50];
2412
2413
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2414
0
         in->content[0], in->content[1],
2415
0
         in->content[2], in->content[3]);
2416
0
      buf[49] = 0;
2417
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2418
0
        "input conversion failed due to input error, bytes %s\n",
2419
0
               buf);
2420
0
        }
2421
0
    }
2422
    /*
2423
     * Ignore when input buffer is not on a boundary
2424
     */
2425
0
    if (ret == -3)
2426
0
        ret = 0;
2427
0
    return (written? written : ret);
2428
0
}
2429
2430
#ifdef LIBXML_OUTPUT_ENABLED
2431
/**
2432
 * xmlCharEncOutput:
2433
 * @output: a parser output buffer
2434
 * @init: is this an initialization call without data
2435
 *
2436
 * Generic front-end for the encoding handler on parser output
2437
 * a first call with @init == 1 has to be made first to initiate the
2438
 * output in case of non-stateless encoding needing to initiate their
2439
 * state or the output (like the BOM in UTF16).
2440
 * In case of UTF8 sequence conversion errors for the given encoder,
2441
 * the content will be automatically remapped to a CharRef sequence.
2442
 *
2443
 * Returns the number of byte written if success, or
2444
 *     -1 general error
2445
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2446
 *        the result of transformation can't fit into the encoding we want), or
2447
 */
2448
int
2449
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2450
110k
{
2451
110k
    int ret;
2452
110k
    size_t written;
2453
110k
    int writtentot = 0;
2454
110k
    size_t toconv;
2455
110k
    int c_in;
2456
110k
    int c_out;
2457
110k
    xmlBufPtr in;
2458
110k
    xmlBufPtr out;
2459
2460
110k
    if ((output == NULL) || (output->encoder == NULL) ||
2461
110k
        (output->buffer == NULL) || (output->conv == NULL))
2462
0
        return (-1);
2463
110k
    out = output->conv;
2464
110k
    in = output->buffer;
2465
2466
111k
retry:
2467
2468
111k
    written = xmlBufAvail(out);
2469
2470
    /*
2471
     * First specific handling of the initialization call
2472
     */
2473
111k
    if (init) {
2474
13.5k
        c_in = 0;
2475
13.5k
        c_out = written;
2476
        /* TODO: Check return value. */
2477
13.5k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2478
13.5k
                          NULL, &c_in);
2479
13.5k
        xmlBufAddLen(out, c_out);
2480
#ifdef DEBUG_ENCODING
2481
  xmlGenericError(xmlGenericErrorContext,
2482
    "initialized encoder\n");
2483
#endif
2484
13.5k
        return(c_out);
2485
13.5k
    }
2486
2487
    /*
2488
     * Conversion itself.
2489
     */
2490
98.0k
    toconv = xmlBufUse(in);
2491
98.0k
    if (toconv == 0)
2492
13.0k
        return (writtentot);
2493
84.9k
    if (toconv > 64 * 1024)
2494
0
        toconv = 64 * 1024;
2495
84.9k
    if (toconv * 4 >= written) {
2496
16.6k
        xmlBufGrow(out, toconv * 4);
2497
16.6k
        written = xmlBufAvail(out);
2498
16.6k
    }
2499
84.9k
    if (written > 256 * 1024)
2500
386
        written = 256 * 1024;
2501
2502
84.9k
    c_in = toconv;
2503
84.9k
    c_out = written;
2504
84.9k
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2505
84.9k
                            xmlBufContent(in), &c_in);
2506
84.9k
    xmlBufShrink(in, c_in);
2507
84.9k
    xmlBufAddLen(out, c_out);
2508
84.9k
    writtentot += c_out;
2509
84.9k
    if (ret == -1) {
2510
0
        if (c_out > 0) {
2511
            /* Can be a limitation of iconv or uconv */
2512
0
            goto retry;
2513
0
        }
2514
0
        ret = -3;
2515
0
    }
2516
2517
    /*
2518
     * Attempt to handle error cases
2519
     */
2520
84.9k
    switch (ret) {
2521
82.1k
        case 0:
2522
#ifdef DEBUG_ENCODING
2523
      xmlGenericError(xmlGenericErrorContext,
2524
        "converted %d bytes to %d bytes of output\n",
2525
              c_in, c_out);
2526
#endif
2527
82.1k
      break;
2528
0
        case -1:
2529
#ifdef DEBUG_ENCODING
2530
      xmlGenericError(xmlGenericErrorContext,
2531
        "output conversion failed by lack of space\n");
2532
#endif
2533
0
      break;
2534
256
        case -3:
2535
#ifdef DEBUG_ENCODING
2536
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2537
              c_in, c_out, (int) xmlBufUse(in));
2538
#endif
2539
256
      break;
2540
0
        case -4:
2541
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2542
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2543
0
            ret = -1;
2544
0
            break;
2545
2.52k
        case -2: {
2546
2.52k
      xmlChar charref[20];
2547
2.52k
      int len = xmlBufUse(in);
2548
2.52k
            xmlChar *content = xmlBufContent(in);
2549
2.52k
      int cur, charrefLen;
2550
2551
2.52k
      cur = xmlGetUTF8Char(content, &len);
2552
2.52k
      if (cur <= 0)
2553
1.07k
                break;
2554
2555
#ifdef DEBUG_ENCODING
2556
            xmlGenericError(xmlGenericErrorContext,
2557
                    "handling output conversion error\n");
2558
            xmlGenericError(xmlGenericErrorContext,
2559
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2560
                    content[0], content[1],
2561
                    content[2], content[3]);
2562
#endif
2563
            /*
2564
             * Removes the UTF8 sequence, and replace it by a charref
2565
             * and continue the transcoding phase, hoping the error
2566
             * did not mangle the encoder state.
2567
             */
2568
1.44k
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2569
1.44k
                             "&#%d;", cur);
2570
1.44k
            xmlBufShrink(in, len);
2571
1.44k
            xmlBufGrow(out, charrefLen * 4);
2572
1.44k
            c_out = xmlBufAvail(out);
2573
1.44k
            c_in = charrefLen;
2574
1.44k
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2575
1.44k
                                    charref, &c_in);
2576
2577
1.44k
      if ((ret < 0) || (c_in != charrefLen)) {
2578
0
    char buf[50];
2579
2580
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2581
0
       content[0], content[1],
2582
0
       content[2], content[3]);
2583
0
    buf[49] = 0;
2584
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2585
0
        "output conversion failed due to conv error, bytes %s\n",
2586
0
             buf);
2587
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2588
0
        content[0] = ' ';
2589
0
                break;
2590
0
      }
2591
2592
1.44k
            xmlBufAddLen(out, c_out);
2593
1.44k
            writtentot += c_out;
2594
1.44k
            goto retry;
2595
1.44k
  }
2596
84.9k
    }
2597
83.5k
    return(writtentot ? writtentot : ret);
2598
84.9k
}
2599
#endif
2600
2601
/**
2602
 * xmlCharEncOutFunc:
2603
 * @handler:  char encoding transformation data structure
2604
 * @out:  an xmlBuffer for the output.
2605
 * @in:  an xmlBuffer for the input
2606
 *
2607
 * Generic front-end for the encoding handler output function
2608
 * a first call with @in == NULL has to be made firs to initiate the
2609
 * output in case of non-stateless encoding needing to initiate their
2610
 * state or the output (like the BOM in UTF16).
2611
 * In case of UTF8 sequence conversion errors for the given encoder,
2612
 * the content will be automatically remapped to a CharRef sequence.
2613
 *
2614
 * Returns the number of byte written if success, or
2615
 *     -1 general error
2616
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2617
 *        the result of transformation can't fit into the encoding we want), or
2618
 */
2619
int
2620
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2621
0
                  xmlBufferPtr in) {
2622
0
    int ret;
2623
0
    int written;
2624
0
    int writtentot = 0;
2625
0
    int toconv;
2626
2627
0
    if (handler == NULL) return(-1);
2628
0
    if (out == NULL) return(-1);
2629
2630
0
retry:
2631
2632
0
    written = out->size - out->use;
2633
2634
0
    if (written > 0)
2635
0
  written--; /* Gennady: count '/0' */
2636
2637
    /*
2638
     * First specific handling of in = NULL, i.e. the initialization call
2639
     */
2640
0
    if (in == NULL) {
2641
0
        toconv = 0;
2642
        /* TODO: Check return value. */
2643
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2644
0
                          NULL, &toconv);
2645
0
        out->use += written;
2646
0
        out->content[out->use] = 0;
2647
#ifdef DEBUG_ENCODING
2648
  xmlGenericError(xmlGenericErrorContext,
2649
    "initialized encoder\n");
2650
#endif
2651
0
        return(0);
2652
0
    }
2653
2654
    /*
2655
     * Conversion itself.
2656
     */
2657
0
    toconv = in->use;
2658
0
    if (toconv == 0)
2659
0
  return(0);
2660
0
    if (toconv * 4 >= written) {
2661
0
        xmlBufferGrow(out, toconv * 4);
2662
0
  written = out->size - out->use - 1;
2663
0
    }
2664
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2665
0
                            in->content, &toconv);
2666
0
    xmlBufferShrink(in, toconv);
2667
0
    out->use += written;
2668
0
    writtentot += written;
2669
0
    out->content[out->use] = 0;
2670
0
    if (ret == -1) {
2671
0
        if (written > 0) {
2672
            /* Can be a limitation of iconv or uconv */
2673
0
            goto retry;
2674
0
        }
2675
0
        ret = -3;
2676
0
    }
2677
2678
    /*
2679
     * Attempt to handle error cases
2680
     */
2681
0
    switch (ret) {
2682
0
        case 0:
2683
#ifdef DEBUG_ENCODING
2684
      xmlGenericError(xmlGenericErrorContext,
2685
        "converted %d bytes to %d bytes of output\n",
2686
              toconv, written);
2687
#endif
2688
0
      break;
2689
0
        case -1:
2690
#ifdef DEBUG_ENCODING
2691
      xmlGenericError(xmlGenericErrorContext,
2692
        "output conversion failed by lack of space\n");
2693
#endif
2694
0
      break;
2695
0
        case -3:
2696
#ifdef DEBUG_ENCODING
2697
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2698
              toconv, written, in->use);
2699
#endif
2700
0
      break;
2701
0
        case -4:
2702
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2703
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2704
0
      ret = -1;
2705
0
            break;
2706
0
        case -2: {
2707
0
      xmlChar charref[20];
2708
0
      int len = in->use;
2709
0
      const xmlChar *utf = (const xmlChar *) in->content;
2710
0
      int cur, charrefLen;
2711
2712
0
      cur = xmlGetUTF8Char(utf, &len);
2713
0
      if (cur <= 0)
2714
0
                break;
2715
2716
#ifdef DEBUG_ENCODING
2717
            xmlGenericError(xmlGenericErrorContext,
2718
                    "handling output conversion error\n");
2719
            xmlGenericError(xmlGenericErrorContext,
2720
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2721
                    in->content[0], in->content[1],
2722
                    in->content[2], in->content[3]);
2723
#endif
2724
            /*
2725
             * Removes the UTF8 sequence, and replace it by a charref
2726
             * and continue the transcoding phase, hoping the error
2727
             * did not mangle the encoder state.
2728
             */
2729
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2730
0
                             "&#%d;", cur);
2731
0
            xmlBufferShrink(in, len);
2732
0
            xmlBufferGrow(out, charrefLen * 4);
2733
0
      written = out->size - out->use - 1;
2734
0
            toconv = charrefLen;
2735
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2736
0
                                    charref, &toconv);
2737
2738
0
      if ((ret < 0) || (toconv != charrefLen)) {
2739
0
    char buf[50];
2740
2741
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2742
0
       in->content[0], in->content[1],
2743
0
       in->content[2], in->content[3]);
2744
0
    buf[49] = 0;
2745
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2746
0
        "output conversion failed due to conv error, bytes %s\n",
2747
0
             buf);
2748
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2749
0
        in->content[0] = ' ';
2750
0
          break;
2751
0
      }
2752
2753
0
            out->use += written;
2754
0
            writtentot += written;
2755
0
            out->content[out->use] = 0;
2756
0
            goto retry;
2757
0
  }
2758
0
    }
2759
0
    return(writtentot ? writtentot : ret);
2760
0
}
2761
2762
/**
2763
 * xmlCharEncCloseFunc:
2764
 * @handler:  char encoding transformation data structure
2765
 *
2766
 * Generic front-end for encoding handler close function
2767
 *
2768
 * Returns 0 if success, or -1 in case of error
2769
 */
2770
int
2771
77.8k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2772
77.8k
    int ret = 0;
2773
77.8k
    int tofree = 0;
2774
77.8k
    int i, handler_in_list = 0;
2775
2776
    /* Avoid unused variable warning if features are disabled. */
2777
77.8k
    (void) handler_in_list;
2778
2779
77.8k
    if (handler == NULL) return(-1);
2780
77.8k
    if (handler->name == NULL) return(-1);
2781
77.8k
    if (handlers != NULL) {
2782
422k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2783
401k
            if (handler == handlers[i]) {
2784
56.5k
          handler_in_list = 1;
2785
56.5k
    break;
2786
56.5k
      }
2787
401k
  }
2788
77.8k
    }
2789
77.8k
#ifdef LIBXML_ICONV_ENABLED
2790
    /*
2791
     * Iconv handlers can be used only once, free the whole block.
2792
     * and the associated icon resources.
2793
     */
2794
77.8k
    if ((handler_in_list == 0) &&
2795
77.8k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2796
21.3k
        tofree = 1;
2797
21.3k
  if (handler->iconv_out != NULL) {
2798
21.3k
      if (iconv_close(handler->iconv_out))
2799
0
    ret = -1;
2800
21.3k
      handler->iconv_out = NULL;
2801
21.3k
  }
2802
21.3k
  if (handler->iconv_in != NULL) {
2803
21.3k
      if (iconv_close(handler->iconv_in))
2804
0
    ret = -1;
2805
21.3k
      handler->iconv_in = NULL;
2806
21.3k
  }
2807
21.3k
    }
2808
77.8k
#endif /* LIBXML_ICONV_ENABLED */
2809
#ifdef LIBXML_ICU_ENABLED
2810
    if ((handler_in_list == 0) &&
2811
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2812
        tofree = 1;
2813
  if (handler->uconv_out != NULL) {
2814
      closeIcuConverter(handler->uconv_out);
2815
      handler->uconv_out = NULL;
2816
  }
2817
  if (handler->uconv_in != NULL) {
2818
      closeIcuConverter(handler->uconv_in);
2819
      handler->uconv_in = NULL;
2820
  }
2821
    }
2822
#endif
2823
77.8k
    if (tofree) {
2824
        /* free up only dynamic handlers iconv/uconv */
2825
21.3k
        if (handler->name != NULL)
2826
21.3k
            xmlFree(handler->name);
2827
21.3k
        handler->name = NULL;
2828
21.3k
        xmlFree(handler);
2829
21.3k
    }
2830
#ifdef DEBUG_ENCODING
2831
    if (ret)
2832
        xmlGenericError(xmlGenericErrorContext,
2833
    "failed to close the encoding handler\n");
2834
    else
2835
        xmlGenericError(xmlGenericErrorContext,
2836
    "closed the encoding handler\n");
2837
#endif
2838
2839
77.8k
    return(ret);
2840
77.8k
}
2841
2842
/**
2843
 * xmlByteConsumed:
2844
 * @ctxt: an XML parser context
2845
 *
2846
 * This function provides the current index of the parser relative
2847
 * to the start of the current entity. This function is computed in
2848
 * bytes from the beginning starting at zero and finishing at the
2849
 * size in byte of the file if parsing a file. The function is
2850
 * of constant cost if the input is UTF-8 but can be costly if run
2851
 * on non-UTF-8 input.
2852
 *
2853
 * Returns the index in bytes from the beginning of the entity or -1
2854
 *         in case the index could not be computed.
2855
 */
2856
long
2857
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2858
0
    xmlParserInputPtr in;
2859
2860
0
    if (ctxt == NULL) return(-1);
2861
0
    in = ctxt->input;
2862
0
    if (in == NULL)  return(-1);
2863
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2864
0
        unsigned int unused = 0;
2865
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2866
        /*
2867
   * Encoding conversion, compute the number of unused original
2868
   * bytes from the input not consumed and subtract that from
2869
   * the raw consumed value, this is not a cheap operation
2870
   */
2871
0
        if (in->end - in->cur > 0) {
2872
0
      unsigned char convbuf[32000];
2873
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2874
0
      int toconv = in->end - in->cur, written = 32000;
2875
2876
0
      int ret;
2877
2878
0
            do {
2879
0
                toconv = in->end - cur;
2880
0
                written = 32000;
2881
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2882
0
                                        cur, &toconv);
2883
0
                if (ret < 0) {
2884
0
                    if (written > 0)
2885
0
                        ret = -2;
2886
0
                    else
2887
0
                        return(-1);
2888
0
                }
2889
0
                unused += written;
2890
0
                cur += toconv;
2891
0
            } while (ret == -2);
2892
0
  }
2893
0
  if (in->buf->rawconsumed < unused)
2894
0
      return(-1);
2895
0
  return(in->buf->rawconsumed - unused);
2896
0
    }
2897
0
    return(in->consumed + (in->cur - in->base));
2898
0
}
2899
2900
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2901
#ifdef LIBXML_ISO8859X_ENABLED
2902
2903
/**
2904
 * UTF8ToISO8859x:
2905
 * @out:  a pointer to an array of bytes to store the result
2906
 * @outlen:  the length of @out
2907
 * @in:  a pointer to an array of UTF-8 chars
2908
 * @inlen:  the length of @in
2909
 * @xlattable: the 2-level transcoding table
2910
 *
2911
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2912
 * block of chars out.
2913
 *
2914
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2915
 * The value of @inlen after return is the number of octets consumed
2916
 *     as the return value is positive, else unpredictable.
2917
 * The value of @outlen after return is the number of octets consumed.
2918
 */
2919
static int
2920
UTF8ToISO8859x(unsigned char* out, int *outlen,
2921
              const unsigned char* in, int *inlen,
2922
              const unsigned char* const xlattable) {
2923
    const unsigned char* outstart = out;
2924
    const unsigned char* inend;
2925
    const unsigned char* instart = in;
2926
    const unsigned char* processed = in;
2927
2928
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2929
        (xlattable == NULL))
2930
  return(-1);
2931
    if (in == NULL) {
2932
        /*
2933
        * initialization nothing to do
2934
        */
2935
        *outlen = 0;
2936
        *inlen = 0;
2937
        return(0);
2938
    }
2939
    inend = in + (*inlen);
2940
    while (in < inend) {
2941
        unsigned char d = *in++;
2942
        if  (d < 0x80)  {
2943
            *out++ = d;
2944
        } else if (d < 0xC0) {
2945
            /* trailing byte in leading position */
2946
            *outlen = out - outstart;
2947
            *inlen = processed - instart;
2948
            return(-2);
2949
        } else if (d < 0xE0) {
2950
            unsigned char c;
2951
            if (!(in < inend)) {
2952
                /* trailing byte not in input buffer */
2953
                *outlen = out - outstart;
2954
                *inlen = processed - instart;
2955
                return(-3);
2956
            }
2957
            c = *in++;
2958
            if ((c & 0xC0) != 0x80) {
2959
                /* not a trailing byte */
2960
                *outlen = out - outstart;
2961
                *inlen = processed - instart;
2962
                return(-2);
2963
            }
2964
            c = c & 0x3F;
2965
            d = d & 0x1F;
2966
            d = xlattable [48 + c + xlattable [d] * 64];
2967
            if (d == 0) {
2968
                /* not in character set */
2969
                *outlen = out - outstart;
2970
                *inlen = processed - instart;
2971
                return(-2);
2972
            }
2973
            *out++ = d;
2974
        } else if (d < 0xF0) {
2975
            unsigned char c1;
2976
            unsigned char c2;
2977
            if (!(in < inend - 1)) {
2978
                /* trailing bytes not in input buffer */
2979
                *outlen = out - outstart;
2980
                *inlen = processed - instart;
2981
                return(-3);
2982
            }
2983
            c1 = *in++;
2984
            if ((c1 & 0xC0) != 0x80) {
2985
                /* not a trailing byte (c1) */
2986
                *outlen = out - outstart;
2987
                *inlen = processed - instart;
2988
                return(-2);
2989
            }
2990
            c2 = *in++;
2991
            if ((c2 & 0xC0) != 0x80) {
2992
                /* not a trailing byte (c2) */
2993
                *outlen = out - outstart;
2994
                *inlen = processed - instart;
2995
                return(-2);
2996
            }
2997
            c1 = c1 & 0x3F;
2998
            c2 = c2 & 0x3F;
2999
      d = d & 0x0F;
3000
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3001
      xlattable [32 + d] * 64] * 64];
3002
            if (d == 0) {
3003
                /* not in character set */
3004
                *outlen = out - outstart;
3005
                *inlen = processed - instart;
3006
                return(-2);
3007
            }
3008
            *out++ = d;
3009
        } else {
3010
            /* cannot transcode >= U+010000 */
3011
            *outlen = out - outstart;
3012
            *inlen = processed - instart;
3013
            return(-2);
3014
        }
3015
        processed = in;
3016
    }
3017
    *outlen = out - outstart;
3018
    *inlen = processed - instart;
3019
    return(*outlen);
3020
}
3021
3022
/**
3023
 * ISO8859xToUTF8
3024
 * @out:  a pointer to an array of bytes to store the result
3025
 * @outlen:  the length of @out
3026
 * @in:  a pointer to an array of ISO Latin 1 chars
3027
 * @inlen:  the length of @in
3028
 *
3029
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3030
 * block of chars out.
3031
 * Returns 0 if success, or -1 otherwise
3032
 * The value of @inlen after return is the number of octets consumed
3033
 * The value of @outlen after return is the number of octets produced.
3034
 */
3035
static int
3036
ISO8859xToUTF8(unsigned char* out, int *outlen,
3037
              const unsigned char* in, int *inlen,
3038
              unsigned short const *unicodetable) {
3039
    unsigned char* outstart = out;
3040
    unsigned char* outend;
3041
    const unsigned char* instart = in;
3042
    const unsigned char* inend;
3043
    const unsigned char* instop;
3044
    unsigned int c;
3045
3046
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3047
        (in == NULL) || (unicodetable == NULL))
3048
  return(-1);
3049
    outend = out + *outlen;
3050
    inend = in + *inlen;
3051
    instop = inend;
3052
3053
    while ((in < inend) && (out < outend - 2)) {
3054
        if (*in >= 0x80) {
3055
            c = unicodetable [*in - 0x80];
3056
            if (c == 0) {
3057
                /* undefined code point */
3058
                *outlen = out - outstart;
3059
                *inlen = in - instart;
3060
                return (-1);
3061
            }
3062
            if (c < 0x800) {
3063
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3064
                *out++ = (c & 0x3F) | 0x80;
3065
            } else {
3066
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3067
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3068
                *out++ = (c & 0x3F) | 0x80;
3069
            }
3070
            ++in;
3071
        }
3072
        if (instop - in > outend - out) instop = in + (outend - out);
3073
        while ((*in < 0x80) && (in < instop)) {
3074
            *out++ = *in++;
3075
        }
3076
    }
3077
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3078
        *out++ =  *in++;
3079
    }
3080
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3081
        *out++ =  *in++;
3082
    }
3083
    *outlen = out - outstart;
3084
    *inlen = in - instart;
3085
    return (*outlen);
3086
}
3087
3088
3089
/************************************************************************
3090
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3091
 ************************************************************************/
3092
3093
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3094
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3095
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3096
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3097
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3098
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3099
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3100
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3101
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3102
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3103
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3104
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3105
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3106
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3107
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3108
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3109
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3110
};
3111
3112
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3113
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3114
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3121
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3122
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3123
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3124
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3125
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3126
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3133
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3134
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3135
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3136
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3137
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3138
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3139
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3140
};
3141
3142
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3143
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3144
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3145
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3146
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3147
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3148
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3149
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3150
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3151
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3152
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3153
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3154
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3155
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3156
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3157
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3158
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3159
};
3160
3161
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3162
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3170
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3171
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3172
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3175
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3187
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3188
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3189
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3190
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3191
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3192
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3193
};
3194
3195
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3196
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3197
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3198
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3199
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3200
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3201
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3202
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3203
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3204
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3205
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3206
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3207
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3208
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3209
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3210
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3211
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3212
};
3213
3214
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3215
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3216
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3223
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3224
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3225
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3226
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3227
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3229
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3233
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3234
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3236
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3239
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3240
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3241
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3242
};
3243
3244
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3245
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3246
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3247
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3248
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3249
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3250
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3251
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3252
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3253
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3254
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3255
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3256
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3257
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3258
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3259
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3260
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3261
};
3262
3263
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3264
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3272
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3273
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3274
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3276
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3277
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3278
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3279
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3280
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3281
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
};
3292
3293
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3294
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3295
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3296
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3297
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3298
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3299
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3300
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3301
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3302
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3303
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3304
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3305
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3306
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3307
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3308
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3310
};
3311
3312
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3313
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3321
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3322
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3330
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3331
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3332
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3333
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
};
3337
3338
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3339
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3340
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3341
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3342
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3343
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3344
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3345
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3346
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3347
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3348
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3349
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3350
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3351
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3352
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3353
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3354
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3355
};
3356
3357
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3358
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3366
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3367
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3368
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3369
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3382
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3383
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3384
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3385
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
};
3390
3391
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3392
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3393
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3394
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3395
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3396
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3397
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3398
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3399
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3400
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3401
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3402
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3403
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3404
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3405
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3406
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3407
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3408
};
3409
3410
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3411
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3419
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3420
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3421
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3422
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3430
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3435
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3436
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3440
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
};
3443
3444
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3445
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3446
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3447
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3448
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3449
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3450
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3451
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3452
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3453
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3454
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3455
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3456
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3457
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3458
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3459
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3460
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3461
};
3462
3463
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3464
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3472
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3473
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3474
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3475
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3476
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3477
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3478
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487
};
3488
3489
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3490
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3491
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3492
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3493
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3494
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3495
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3496
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3497
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3498
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3499
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3500
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3501
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3502
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3503
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3504
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3505
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3506
};
3507
3508
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3509
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3517
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3518
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3519
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3520
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3521
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3522
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3523
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3524
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3525
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3527
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3528
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3537
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3538
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3539
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3540
};
3541
3542
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3543
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3544
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3545
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3546
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3547
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3548
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3549
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3550
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3551
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3552
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3553
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3554
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3555
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3556
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3557
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3558
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3559
};
3560
3561
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3562
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3570
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3571
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3577
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3578
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3579
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3580
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3581
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3586
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3587
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589
};
3590
3591
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3592
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3593
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3594
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3595
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3596
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3597
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3598
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3599
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3600
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3601
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3602
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3603
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3604
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3605
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3606
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3607
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3608
};
3609
3610
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3611
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3619
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3620
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3621
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3622
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3628
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3632
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3633
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3634
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3636
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3637
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3638
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3639
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3641
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3642
};
3643
3644
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3645
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3646
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3647
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3648
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3649
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3650
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3651
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3652
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3653
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3654
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3655
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3656
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3657
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3658
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3659
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3660
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3661
};
3662
3663
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3664
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3672
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3673
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3674
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3679
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3680
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3681
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3699
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3701
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3702
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3704
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3705
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3706
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3707
};
3708
3709
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3710
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3711
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3712
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3713
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3714
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3715
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3716
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3717
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3718
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3719
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3720
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3721
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3722
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3723
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3724
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3725
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3726
};
3727
3728
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3729
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3737
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3738
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3739
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3740
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3752
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3753
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3754
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3755
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3756
};
3757
3758
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3759
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3760
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3761
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3762
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3763
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3764
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3765
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3766
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3767
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3768
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3769
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3770
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3771
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3772
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3773
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3774
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3775
};
3776
3777
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3778
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3786
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3787
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3788
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3789
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3790
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3794
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3795
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3797
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3804
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3807
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3811
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3814
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3815
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3816
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3817
};
3818
3819
3820
/*
3821
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3822
 */
3823
3824
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3825
    const unsigned char* in, int *inlen) {
3826
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3827
}
3828
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3829
    const unsigned char* in, int *inlen) {
3830
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3831
}
3832
3833
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3834
    const unsigned char* in, int *inlen) {
3835
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3836
}
3837
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3838
    const unsigned char* in, int *inlen) {
3839
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3840
}
3841
3842
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3843
    const unsigned char* in, int *inlen) {
3844
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3845
}
3846
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3847
    const unsigned char* in, int *inlen) {
3848
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3849
}
3850
3851
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3852
    const unsigned char* in, int *inlen) {
3853
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3854
}
3855
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3856
    const unsigned char* in, int *inlen) {
3857
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3858
}
3859
3860
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3861
    const unsigned char* in, int *inlen) {
3862
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3863
}
3864
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3865
    const unsigned char* in, int *inlen) {
3866
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3867
}
3868
3869
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3870
    const unsigned char* in, int *inlen) {
3871
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3872
}
3873
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3874
    const unsigned char* in, int *inlen) {
3875
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3876
}
3877
3878
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3879
    const unsigned char* in, int *inlen) {
3880
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3881
}
3882
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3883
    const unsigned char* in, int *inlen) {
3884
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3885
}
3886
3887
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3888
    const unsigned char* in, int *inlen) {
3889
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3890
}
3891
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3892
    const unsigned char* in, int *inlen) {
3893
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3894
}
3895
3896
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3897
    const unsigned char* in, int *inlen) {
3898
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3899
}
3900
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3901
    const unsigned char* in, int *inlen) {
3902
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3903
}
3904
3905
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3906
    const unsigned char* in, int *inlen) {
3907
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3908
}
3909
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3910
    const unsigned char* in, int *inlen) {
3911
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3912
}
3913
3914
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3915
    const unsigned char* in, int *inlen) {
3916
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3917
}
3918
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3919
    const unsigned char* in, int *inlen) {
3920
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3921
}
3922
3923
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3924
    const unsigned char* in, int *inlen) {
3925
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3926
}
3927
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3928
    const unsigned char* in, int *inlen) {
3929
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3930
}
3931
3932
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3933
    const unsigned char* in, int *inlen) {
3934
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3935
}
3936
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3937
    const unsigned char* in, int *inlen) {
3938
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3939
}
3940
3941
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3942
    const unsigned char* in, int *inlen) {
3943
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3944
}
3945
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3946
    const unsigned char* in, int *inlen) {
3947
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3948
}
3949
3950
static void
3951
xmlRegisterCharEncodingHandlersISO8859x (void) {
3952
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3953
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3954
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3955
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3956
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3957
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3958
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3959
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3960
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3961
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3962
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3963
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3964
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3965
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3966
}
3967
3968
#endif
3969
#endif
3970