Coverage Report

Created: 2023-06-07 06:14

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
#ifdef LIBXML_ICU_ENABLED
82
static uconv_t*
83
openIcuConverter(const char* name, int toUnicode)
84
{
85
  UErrorCode status = U_ZERO_ERROR;
86
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
87
  if (conv == NULL)
88
    return NULL;
89
90
  conv->pivot_source = conv->pivot_buf;
91
  conv->pivot_target = conv->pivot_buf;
92
93
  conv->uconv = ucnv_open(name, &status);
94
  if (U_FAILURE(status))
95
    goto error;
96
97
  status = U_ZERO_ERROR;
98
  if (toUnicode) {
99
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
100
                        NULL, NULL, NULL, &status);
101
  }
102
  else {
103
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
104
                        NULL, NULL, NULL, &status);
105
  }
106
  if (U_FAILURE(status))
107
    goto error;
108
109
  status = U_ZERO_ERROR;
110
  conv->utf8 = ucnv_open("UTF-8", &status);
111
  if (U_SUCCESS(status))
112
    return conv;
113
114
error:
115
  if (conv->uconv)
116
    ucnv_close(conv->uconv);
117
  xmlFree(conv);
118
  return NULL;
119
}
120
121
static void
122
closeIcuConverter(uconv_t *conv)
123
{
124
  if (conv != NULL) {
125
    ucnv_close(conv->uconv);
126
    ucnv_close(conv->utf8);
127
    xmlFree(conv);
128
  }
129
}
130
#endif /* LIBXML_ICU_ENABLED */
131
132
/************************************************************************
133
 *                  *
134
 *    Conversions To/From UTF8 encoding     *
135
 *                  *
136
 ************************************************************************/
137
138
/**
139
 * asciiToUTF8:
140
 * @out:  a pointer to an array of bytes to store the result
141
 * @outlen:  the length of @out
142
 * @in:  a pointer to an array of ASCII chars
143
 * @inlen:  the length of @in
144
 *
145
 * Take a block of ASCII chars in and try to convert it to an UTF-8
146
 * block of chars out.
147
 *
148
 * Returns the number of bytes written or an XML_ENC_ERR code.
149
 *
150
 * The value of @inlen after return is the number of octets consumed
151
 *     if the return value is positive, else unpredictable.
152
 * The value of @outlen after return is the number of octets produced.
153
 */
154
static int
155
asciiToUTF8(unsigned char* out, int *outlen,
156
498
              const unsigned char* in, int *inlen) {
157
498
    unsigned char* outstart = out;
158
498
    const unsigned char* base = in;
159
498
    const unsigned char* processed = in;
160
498
    unsigned char* outend = out + *outlen;
161
498
    const unsigned char* inend;
162
498
    unsigned int c;
163
164
498
    inend = in + (*inlen);
165
12.4k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
166
12.2k
  c= *in++;
167
168
12.2k
        if (out >= outend)
169
0
      break;
170
12.2k
        if (c < 0x80) {
171
11.9k
      *out++ = c;
172
11.9k
  } else {
173
264
      *outlen = out - outstart;
174
264
      *inlen = processed - base;
175
264
      return(XML_ENC_ERR_INPUT);
176
264
  }
177
178
11.9k
  processed = (const unsigned char*) in;
179
11.9k
    }
180
234
    *outlen = out - outstart;
181
234
    *inlen = processed - base;
182
234
    return(*outlen);
183
498
}
184
185
#ifdef LIBXML_OUTPUT_ENABLED
186
/**
187
 * UTF8Toascii:
188
 * @out:  a pointer to an array of bytes to store the result
189
 * @outlen:  the length of @out
190
 * @in:  a pointer to an array of UTF-8 chars
191
 * @inlen:  the length of @in
192
 *
193
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
194
 * block of chars out.
195
 *
196
 * Returns the number of bytes written or an XML_ENC_ERR code.
197
 *
198
 * The value of @inlen after return is the number of octets consumed
199
 *     if the return value is positive, else unpredictable.
200
 * The value of @outlen after return is the number of octets produced.
201
 */
202
static int
203
UTF8Toascii(unsigned char* out, int *outlen,
204
465k
              const unsigned char* in, int *inlen) {
205
465k
    const unsigned char* processed = in;
206
465k
    const unsigned char* outend;
207
465k
    const unsigned char* outstart = out;
208
465k
    const unsigned char* instart = in;
209
465k
    const unsigned char* inend;
210
465k
    unsigned int c, d;
211
465k
    int trailing;
212
213
465k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
214
0
        return(XML_ENC_ERR_INTERNAL);
215
465k
    if (in == NULL) {
216
        /*
217
   * initialization nothing to do
218
   */
219
51
  *outlen = 0;
220
51
  *inlen = 0;
221
51
  return(0);
222
51
    }
223
465k
    inend = in + (*inlen);
224
465k
    outend = out + (*outlen);
225
2.39M
    while (in < inend) {
226
2.16M
  d = *in++;
227
2.16M
  if      (d < 0x80)  { c= d; trailing= 0; }
228
232k
  else if (d < 0xC0) {
229
      /* trailing byte in leading position */
230
0
      *outlen = out - outstart;
231
0
      *inlen = processed - instart;
232
0
      return(XML_ENC_ERR_INPUT);
233
232k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
234
725
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
235
608
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
236
0
  else {
237
      /* no chance for this in Ascii */
238
0
      *outlen = out - outstart;
239
0
      *inlen = processed - instart;
240
0
      return(XML_ENC_ERR_INPUT);
241
0
  }
242
243
2.16M
  if (inend - in < trailing) {
244
83
      break;
245
83
  }
246
247
2.39M
  for ( ; trailing; trailing--) {
248
234k
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
249
0
    break;
250
234k
      c <<= 6;
251
234k
      c |= d & 0x3F;
252
234k
  }
253
254
  /* assertion: c is a single UTF-4 value */
255
2.16M
  if (c < 0x80) {
256
1.92M
      if (out >= outend)
257
0
    break;
258
1.92M
      *out++ = c;
259
1.92M
  } else {
260
      /* no chance for this in Ascii */
261
232k
      *outlen = out - outstart;
262
232k
      *inlen = processed - instart;
263
232k
      return(XML_ENC_ERR_INPUT);
264
232k
  }
265
1.92M
  processed = in;
266
1.92M
    }
267
233k
    *outlen = out - outstart;
268
233k
    *inlen = processed - instart;
269
233k
    return(*outlen);
270
465k
}
271
#endif /* LIBXML_OUTPUT_ENABLED */
272
273
/**
274
 * isolat1ToUTF8:
275
 * @out:  a pointer to an array of bytes to store the result
276
 * @outlen:  the length of @out
277
 * @in:  a pointer to an array of ISO Latin 1 chars
278
 * @inlen:  the length of @in
279
 *
280
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
281
 * block of chars out.
282
 *
283
 * Returns the number of bytes written or an XML_ENC_ERR code.
284
 *
285
 * The value of @inlen after return is the number of octets consumed
286
 *     if the return value is positive, else unpredictable.
287
 * The value of @outlen after return is the number of octets produced.
288
 */
289
int
290
isolat1ToUTF8(unsigned char* out, int *outlen,
291
20.2k
              const unsigned char* in, int *inlen) {
292
20.2k
    unsigned char* outstart = out;
293
20.2k
    const unsigned char* base = in;
294
20.2k
    unsigned char* outend;
295
20.2k
    const unsigned char* inend;
296
20.2k
    const unsigned char* instop;
297
298
20.2k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
299
0
  return(XML_ENC_ERR_INTERNAL);
300
301
20.2k
    outend = out + *outlen;
302
20.2k
    inend = in + (*inlen);
303
20.2k
    instop = inend;
304
305
26.6M
    while ((in < inend) && (out < outend - 1)) {
306
26.6M
  if (*in >= 0x80) {
307
26.6M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
308
26.6M
            *out++ = ((*in) & 0x3F) | 0x80;
309
26.6M
      ++in;
310
26.6M
  }
311
26.6M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
312
140M
  while ((in < instop) && (*in < 0x80)) {
313
113M
      *out++ = *in++;
314
113M
  }
315
26.6M
    }
316
20.2k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
317
0
        *out++ = *in++;
318
0
    }
319
20.2k
    *outlen = out - outstart;
320
20.2k
    *inlen = in - base;
321
20.2k
    return(*outlen);
322
20.2k
}
323
324
/**
325
 * UTF8ToUTF8:
326
 * @out:  a pointer to an array of bytes to store the result
327
 * @outlen:  the length of @out
328
 * @inb:  a pointer to an array of UTF-8 chars
329
 * @inlenb:  the length of @in in UTF-8 chars
330
 *
331
 * No op copy operation for UTF8 handling.
332
 *
333
 * Returns the number of bytes written or an XML_ENC_ERR code.
334
 *
335
 *     The value of *inlen after return is the number of octets consumed
336
 *     if the return value is positive, else unpredictable.
337
 */
338
static int
339
UTF8ToUTF8(unsigned char* out, int *outlen,
340
           const unsigned char* inb, int *inlenb)
341
0
{
342
0
    int len;
343
344
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
345
0
  return(XML_ENC_ERR_INTERNAL);
346
0
    if (inb == NULL) {
347
        /* inb == NULL means output is initialized. */
348
0
        *outlen = 0;
349
0
        *inlenb = 0;
350
0
        return(0);
351
0
    }
352
0
    if (*outlen > *inlenb) {
353
0
  len = *inlenb;
354
0
    } else {
355
0
  len = *outlen;
356
0
    }
357
0
    if (len < 0)
358
0
  return(XML_ENC_ERR_INTERNAL);
359
360
    /*
361
     * FIXME: Conversion functions must assure valid UTF-8, so we have
362
     * to check for UTF-8 validity. Preferably, this converter shouldn't
363
     * be used at all.
364
     */
365
0
    memcpy(out, inb, len);
366
367
0
    *outlen = len;
368
0
    *inlenb = len;
369
0
    return(*outlen);
370
0
}
371
372
373
#ifdef LIBXML_OUTPUT_ENABLED
374
/**
375
 * UTF8Toisolat1:
376
 * @out:  a pointer to an array of bytes to store the result
377
 * @outlen:  the length of @out
378
 * @in:  a pointer to an array of UTF-8 chars
379
 * @inlen:  the length of @in
380
 *
381
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
382
 * block of chars out.
383
 *
384
 * Returns the number of bytes written or an XML_ENC_ERR code.
385
 *
386
 * The value of @inlen after return is the number of octets consumed
387
 *     if the return value is positive, else unpredictable.
388
 * The value of @outlen after return is the number of octets produced.
389
 */
390
int
391
UTF8Toisolat1(unsigned char* out, int *outlen,
392
36.7k
              const unsigned char* in, int *inlen) {
393
36.7k
    const unsigned char* processed = in;
394
36.7k
    const unsigned char* outend;
395
36.7k
    const unsigned char* outstart = out;
396
36.7k
    const unsigned char* instart = in;
397
36.7k
    const unsigned char* inend;
398
36.7k
    unsigned int c, d;
399
36.7k
    int trailing;
400
401
36.7k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
402
7
        return(XML_ENC_ERR_INTERNAL);
403
36.7k
    if (in == NULL) {
404
        /*
405
   * initialization nothing to do
406
   */
407
88
  *outlen = 0;
408
88
  *inlen = 0;
409
88
  return(0);
410
88
    }
411
36.6k
    inend = in + (*inlen);
412
36.6k
    outend = out + (*outlen);
413
7.91M
    while (in < inend) {
414
7.89M
  d = *in++;
415
7.89M
  if      (d < 0x80)  { c= d; trailing= 0; }
416
3.78M
  else if (d < 0xC0) {
417
      /* trailing byte in leading position */
418
0
      *outlen = out - outstart;
419
0
      *inlen = processed - instart;
420
0
      return(XML_ENC_ERR_INPUT);
421
3.78M
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
422
3.59k
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
423
311
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
424
0
  else {
425
      /* no chance for this in IsoLat1 */
426
0
      *outlen = out - outstart;
427
0
      *inlen = processed - instart;
428
0
      return(XML_ENC_ERR_INPUT);
429
0
  }
430
431
7.89M
  if (inend - in < trailing) {
432
449
      break;
433
449
  }
434
435
11.6M
  for ( ; trailing; trailing--) {
436
3.79M
      if (in >= inend)
437
0
    break;
438
3.79M
      if (((d= *in++) & 0xC0) != 0x80) {
439
0
    *outlen = out - outstart;
440
0
    *inlen = processed - instart;
441
0
    return(XML_ENC_ERR_INPUT);
442
0
      }
443
3.79M
      c <<= 6;
444
3.79M
      c |= d & 0x3F;
445
3.79M
  }
446
447
  /* assertion: c is a single UTF-4 value */
448
7.89M
  if (c <= 0xFF) {
449
7.88M
      if (out >= outend)
450
0
    break;
451
7.88M
      *out++ = c;
452
7.88M
  } else {
453
      /* no chance for this in IsoLat1 */
454
17.4k
      *outlen = out - outstart;
455
17.4k
      *inlen = processed - instart;
456
17.4k
      return(XML_ENC_ERR_INPUT);
457
17.4k
  }
458
7.88M
  processed = in;
459
7.88M
    }
460
19.2k
    *outlen = out - outstart;
461
19.2k
    *inlen = processed - instart;
462
19.2k
    return(*outlen);
463
36.6k
}
464
#endif /* LIBXML_OUTPUT_ENABLED */
465
466
/**
467
 * UTF16LEToUTF8:
468
 * @out:  a pointer to an array of bytes to store the result
469
 * @outlen:  the length of @out
470
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
471
 * @inlenb:  the length of @in in UTF-16LE chars
472
 *
473
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
474
 * block of chars out. This function assumes the endian property
475
 * is the same between the native type of this machine and the
476
 * inputed one.
477
 *
478
 * Returns the number of bytes written or an XML_ENC_ERR code.
479
 *
480
 * The value of *inlen after return is the number of octets consumed
481
 * if the return value is positive, else unpredictable.
482
 */
483
static int
484
UTF16LEToUTF8(unsigned char* out, int *outlen,
485
            const unsigned char* inb, int *inlenb)
486
12.0k
{
487
12.0k
    unsigned char* outstart = out;
488
12.0k
    const unsigned char* processed = inb;
489
12.0k
    unsigned char* outend;
490
12.0k
    unsigned short* in = (unsigned short*) inb;
491
12.0k
    unsigned short* inend;
492
12.0k
    unsigned int c, d, inlen;
493
12.0k
    unsigned char *tmp;
494
12.0k
    int bits;
495
496
12.0k
    if (*outlen == 0) {
497
0
        *inlenb = 0;
498
0
        return(0);
499
0
    }
500
12.0k
    outend = out + *outlen;
501
12.0k
    if ((*inlenb % 2) == 1)
502
10.9k
        (*inlenb)--;
503
12.0k
    inlen = *inlenb / 2;
504
12.0k
    inend = in + inlen;
505
66.1k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
506
56.8k
        if (xmlLittleEndian) {
507
56.8k
      c= *in++;
508
56.8k
  } else {
509
0
      tmp = (unsigned char *) in;
510
0
      c = *tmp++;
511
0
      c = c | (*tmp << 8);
512
0
      in++;
513
0
  }
514
56.8k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
515
3.02k
      if (in >= inend) {           /* handle split mutli-byte characters */
516
2.36k
    break;
517
2.36k
      }
518
657
      if (xmlLittleEndian) {
519
657
    d = *in++;
520
657
      } else {
521
0
    tmp = (unsigned char *) in;
522
0
    d = *tmp++;
523
0
    d = d | (*tmp << 8);
524
0
    in++;
525
0
      }
526
657
            if ((d & 0xFC00) == 0xDC00) {
527
241
                c &= 0x03FF;
528
241
                c <<= 10;
529
241
                c |= d & 0x03FF;
530
241
                c += 0x10000;
531
241
            }
532
416
            else {
533
416
    *outlen = out - outstart;
534
416
    *inlenb = processed - inb;
535
416
          return(XML_ENC_ERR_INPUT);
536
416
      }
537
657
        }
538
539
  /* assertion: c is a single UTF-4 value */
540
54.0k
        if (out >= outend)
541
0
      break;
542
54.0k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
543
49.2k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
544
48.4k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
545
241
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
546
547
151k
        for ( ; bits >= 0; bits-= 6) {
548
97.8k
            if (out >= outend)
549
0
          break;
550
97.8k
            *out++= ((c >> bits) & 0x3F) | 0x80;
551
97.8k
        }
552
54.0k
  processed = (const unsigned char*) in;
553
54.0k
    }
554
11.6k
    *outlen = out - outstart;
555
11.6k
    *inlenb = processed - inb;
556
11.6k
    return(*outlen);
557
12.0k
}
558
559
#ifdef LIBXML_OUTPUT_ENABLED
560
/**
561
 * UTF8ToUTF16LE:
562
 * @outb:  a pointer to an array of bytes to store the result
563
 * @outlen:  the length of @outb
564
 * @in:  a pointer to an array of UTF-8 chars
565
 * @inlen:  the length of @in
566
 *
567
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
568
 * block of chars out.
569
 *
570
 * Returns the number of bytes written or an XML_ENC_ERR code.
571
 */
572
static int
573
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
574
            const unsigned char* in, int *inlen)
575
596
{
576
596
    unsigned short* out = (unsigned short*) outb;
577
596
    const unsigned char* processed = in;
578
596
    const unsigned char *const instart = in;
579
596
    unsigned short* outstart= out;
580
596
    unsigned short* outend;
581
596
    const unsigned char* inend;
582
596
    unsigned int c, d;
583
596
    int trailing;
584
596
    unsigned char *tmp;
585
596
    unsigned short tmp1, tmp2;
586
587
    /* UTF16LE encoding has no BOM */
588
596
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
589
2
        return(XML_ENC_ERR_INTERNAL);
590
594
    if (in == NULL) {
591
53
  *outlen = 0;
592
53
  *inlen = 0;
593
53
  return(0);
594
53
    }
595
541
    inend= in + *inlen;
596
541
    outend = out + (*outlen / 2);
597
1.64M
    while (in < inend) {
598
1.64M
      d= *in++;
599
1.64M
      if      (d < 0x80)  { c= d; trailing= 0; }
600
399k
      else if (d < 0xC0) {
601
          /* trailing byte in leading position */
602
0
    *outlen = (out - outstart) * 2;
603
0
    *inlen = processed - instart;
604
0
    return(XML_ENC_ERR_INPUT);
605
399k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
606
791
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
607
75
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
608
0
      else {
609
  /* no chance for this in UTF-16 */
610
0
  *outlen = (out - outstart) * 2;
611
0
  *inlen = processed - instart;
612
0
  return(XML_ENC_ERR_INPUT);
613
0
      }
614
615
1.64M
      if (inend - in < trailing) {
616
103
          break;
617
103
      }
618
619
2.04M
      for ( ; trailing; trailing--) {
620
399k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
621
0
        break;
622
399k
          c <<= 6;
623
399k
          c |= d & 0x3F;
624
399k
      }
625
626
      /* assertion: c is a single UTF-4 value */
627
1.64M
        if (c < 0x10000) {
628
1.64M
            if (out >= outend)
629
0
          break;
630
1.64M
      if (xmlLittleEndian) {
631
1.64M
    *out++ = c;
632
1.64M
      } else {
633
0
    tmp = (unsigned char *) out;
634
0
    *tmp = (unsigned char) c; /* Explicit truncation */
635
0
    *(tmp + 1) = c >> 8 ;
636
0
    out++;
637
0
      }
638
1.64M
        }
639
75
        else if (c < 0x110000) {
640
75
            if (out+1 >= outend)
641
0
          break;
642
75
            c -= 0x10000;
643
75
      if (xmlLittleEndian) {
644
75
    *out++ = 0xD800 | (c >> 10);
645
75
    *out++ = 0xDC00 | (c & 0x03FF);
646
75
      } else {
647
0
    tmp1 = 0xD800 | (c >> 10);
648
0
    tmp = (unsigned char *) out;
649
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
650
0
    *(tmp + 1) = tmp1 >> 8;
651
0
    out++;
652
653
0
    tmp2 = 0xDC00 | (c & 0x03FF);
654
0
    tmp = (unsigned char *) out;
655
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
656
0
    *(tmp + 1) = tmp2 >> 8;
657
0
    out++;
658
0
      }
659
75
        }
660
0
        else
661
0
      break;
662
1.64M
  processed = in;
663
1.64M
    }
664
541
    *outlen = (out - outstart) * 2;
665
541
    *inlen = processed - instart;
666
541
    return(*outlen);
667
541
}
668
669
/**
670
 * UTF8ToUTF16:
671
 * @outb:  a pointer to an array of bytes to store the result
672
 * @outlen:  the length of @outb
673
 * @in:  a pointer to an array of UTF-8 chars
674
 * @inlen:  the length of @in
675
 *
676
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
677
 * block of chars out.
678
 *
679
 * Returns the number of bytes written or an XML_ENC_ERR code.
680
 */
681
static int
682
UTF8ToUTF16(unsigned char* outb, int *outlen,
683
            const unsigned char* in, int *inlen)
684
0
{
685
0
    if (in == NULL) {
686
  /*
687
   * initialization, add the Byte Order Mark for UTF-16LE
688
   */
689
0
        if (*outlen >= 2) {
690
0
      outb[0] = 0xFF;
691
0
      outb[1] = 0xFE;
692
0
      *outlen = 2;
693
0
      *inlen = 0;
694
#ifdef DEBUG_ENCODING
695
            xmlGenericError(xmlGenericErrorContext,
696
        "Added FFFE Byte Order Mark\n");
697
#endif
698
0
      return(2);
699
0
  }
700
0
  *outlen = 0;
701
0
  *inlen = 0;
702
0
  return(0);
703
0
    }
704
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
705
0
}
706
#endif /* LIBXML_OUTPUT_ENABLED */
707
708
/**
709
 * UTF16BEToUTF8:
710
 * @out:  a pointer to an array of bytes to store the result
711
 * @outlen:  the length of @out
712
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
713
 * @inlenb:  the length of @in in UTF-16 chars
714
 *
715
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
716
 * block of chars out. This function assumes the endian property
717
 * is the same between the native type of this machine and the
718
 * inputed one.
719
 *
720
 * Returns the number of bytes written or an XML_ENC_ERR code.
721
 *
722
 * The value of *inlen after return is the number of octets consumed
723
 * if the return value is positive, else unpredictable.
724
 */
725
static int
726
UTF16BEToUTF8(unsigned char* out, int *outlen,
727
            const unsigned char* inb, int *inlenb)
728
34.6k
{
729
34.6k
    unsigned char* outstart = out;
730
34.6k
    const unsigned char* processed = inb;
731
34.6k
    unsigned char* outend;
732
34.6k
    unsigned short* in = (unsigned short*) inb;
733
34.6k
    unsigned short* inend;
734
34.6k
    unsigned int c, d, inlen;
735
34.6k
    unsigned char *tmp;
736
34.6k
    int bits;
737
738
34.6k
    if (*outlen == 0) {
739
0
        *inlenb = 0;
740
0
        return(0);
741
0
    }
742
34.6k
    outend = out + *outlen;
743
34.6k
    if ((*inlenb % 2) == 1)
744
33.1k
        (*inlenb)--;
745
34.6k
    inlen = *inlenb / 2;
746
34.6k
    inend= in + inlen;
747
1.64M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
748
1.61M
  if (xmlLittleEndian) {
749
1.61M
      tmp = (unsigned char *) in;
750
1.61M
      c = *tmp++;
751
1.61M
      c = (c << 8) | *tmp;
752
1.61M
      in++;
753
1.61M
  } else {
754
0
      c= *in++;
755
0
  }
756
1.61M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
757
2.01k
      if (in >= inend) {           /* handle split mutli-byte characters */
758
778
                break;
759
778
      }
760
1.24k
      if (xmlLittleEndian) {
761
1.24k
    tmp = (unsigned char *) in;
762
1.24k
    d = *tmp++;
763
1.24k
    d = (d << 8) | *tmp;
764
1.24k
    in++;
765
1.24k
      } else {
766
0
    d= *in++;
767
0
      }
768
1.24k
            if ((d & 0xFC00) == 0xDC00) {
769
394
                c &= 0x03FF;
770
394
                c <<= 10;
771
394
                c |= d & 0x03FF;
772
394
                c += 0x10000;
773
394
            }
774
847
            else {
775
847
    *outlen = out - outstart;
776
847
    *inlenb = processed - inb;
777
847
          return(XML_ENC_ERR_INPUT);
778
847
      }
779
1.24k
        }
780
781
  /* assertion: c is a single UTF-4 value */
782
1.61M
        if (out >= outend)
783
0
      break;
784
1.61M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
785
1.59M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
786
1.58M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
787
394
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
788
789
4.79M
        for ( ; bits >= 0; bits-= 6) {
790
3.18M
            if (out >= outend)
791
0
          break;
792
3.18M
            *out++= ((c >> bits) & 0x3F) | 0x80;
793
3.18M
        }
794
1.61M
  processed = (const unsigned char*) in;
795
1.61M
    }
796
33.8k
    *outlen = out - outstart;
797
33.8k
    *inlenb = processed - inb;
798
33.8k
    return(*outlen);
799
34.6k
}
800
801
#ifdef LIBXML_OUTPUT_ENABLED
802
/**
803
 * UTF8ToUTF16BE:
804
 * @outb:  a pointer to an array of bytes to store the result
805
 * @outlen:  the length of @outb
806
 * @in:  a pointer to an array of UTF-8 chars
807
 * @inlen:  the length of @in
808
 *
809
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
810
 * block of chars out.
811
 *
812
 * Returns the number of bytes written or an XML_ENC_ERR code.
813
 */
814
static int
815
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
816
            const unsigned char* in, int *inlen)
817
682
{
818
682
    unsigned short* out = (unsigned short*) outb;
819
682
    const unsigned char* processed = in;
820
682
    const unsigned char *const instart = in;
821
682
    unsigned short* outstart= out;
822
682
    unsigned short* outend;
823
682
    const unsigned char* inend;
824
682
    unsigned int c, d;
825
682
    int trailing;
826
682
    unsigned char *tmp;
827
682
    unsigned short tmp1, tmp2;
828
829
    /* UTF-16BE has no BOM */
830
682
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
831
2
        return(XML_ENC_ERR_INTERNAL);
832
680
    if (in == NULL) {
833
71
  *outlen = 0;
834
71
  *inlen = 0;
835
71
  return(0);
836
71
    }
837
609
    inend= in + *inlen;
838
609
    outend = out + (*outlen / 2);
839
1.92M
    while (in < inend) {
840
1.92M
      d= *in++;
841
1.92M
      if      (d < 0x80)  { c= d; trailing= 0; }
842
1.47M
      else if (d < 0xC0)  {
843
          /* trailing byte in leading position */
844
0
    *outlen = out - outstart;
845
0
    *inlen = processed - instart;
846
0
    return(XML_ENC_ERR_INPUT);
847
1.47M
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
848
357
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
849
25
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
850
0
      else {
851
          /* no chance for this in UTF-16 */
852
0
    *outlen = out - outstart;
853
0
    *inlen = processed - instart;
854
0
    return(XML_ENC_ERR_INPUT);
855
0
      }
856
857
1.92M
      if (inend - in < trailing) {
858
352
          break;
859
352
      }
860
861
3.39M
      for ( ; trailing; trailing--) {
862
1.47M
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
863
1.47M
          c <<= 6;
864
1.47M
          c |= d & 0x3F;
865
1.47M
      }
866
867
      /* assertion: c is a single UTF-4 value */
868
1.92M
        if (c < 0x10000) {
869
1.92M
            if (out >= outend)  break;
870
1.92M
      if (xmlLittleEndian) {
871
1.92M
    tmp = (unsigned char *) out;
872
1.92M
    *tmp = c >> 8;
873
1.92M
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
874
1.92M
    out++;
875
1.92M
      } else {
876
0
    *out++ = c;
877
0
      }
878
1.92M
        }
879
25
        else if (c < 0x110000) {
880
25
            if (out+1 >= outend)  break;
881
25
            c -= 0x10000;
882
25
      if (xmlLittleEndian) {
883
25
    tmp1 = 0xD800 | (c >> 10);
884
25
    tmp = (unsigned char *) out;
885
25
    *tmp = tmp1 >> 8;
886
25
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
887
25
    out++;
888
889
25
    tmp2 = 0xDC00 | (c & 0x03FF);
890
25
    tmp = (unsigned char *) out;
891
25
    *tmp = tmp2 >> 8;
892
25
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
893
25
    out++;
894
25
      } else {
895
0
    *out++ = 0xD800 | (c >> 10);
896
0
    *out++ = 0xDC00 | (c & 0x03FF);
897
0
      }
898
25
        }
899
0
        else
900
0
      break;
901
1.92M
  processed = in;
902
1.92M
    }
903
609
    *outlen = (out - outstart) * 2;
904
609
    *inlen = processed - instart;
905
609
    return(*outlen);
906
609
}
907
#endif /* LIBXML_OUTPUT_ENABLED */
908
909
/************************************************************************
910
 *                  *
911
 *    Generic encoding handling routines      *
912
 *                  *
913
 ************************************************************************/
914
915
/**
916
 * xmlDetectCharEncoding:
917
 * @in:  a pointer to the first bytes of the XML entity, must be at least
918
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
919
 * @len:  pointer to the length of the buffer
920
 *
921
 * Guess the encoding of the entity using the first bytes of the entity content
922
 * according to the non-normative appendix F of the XML-1.0 recommendation.
923
 *
924
 * Returns one of the XML_CHAR_ENCODING_... values.
925
 */
926
xmlCharEncoding
927
xmlDetectCharEncoding(const unsigned char* in, int len)
928
359k
{
929
359k
    if (in == NULL)
930
0
        return(XML_CHAR_ENCODING_NONE);
931
359k
    if (len >= 4) {
932
359k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
933
359k
      (in[2] == 0x00) && (in[3] == 0x3C))
934
211
      return(XML_CHAR_ENCODING_UCS4BE);
935
359k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
936
359k
      (in[2] == 0x00) && (in[3] == 0x00))
937
247
      return(XML_CHAR_ENCODING_UCS4LE);
938
359k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
939
359k
      (in[2] == 0x3C) && (in[3] == 0x00))
940
210
      return(XML_CHAR_ENCODING_UCS4_2143);
941
359k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
942
359k
      (in[2] == 0x00) && (in[3] == 0x00))
943
212
      return(XML_CHAR_ENCODING_UCS4_3412);
944
358k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
945
358k
      (in[2] == 0xA7) && (in[3] == 0x94))
946
2.43k
      return(XML_CHAR_ENCODING_EBCDIC);
947
356k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
948
356k
      (in[2] == 0x78) && (in[3] == 0x6D))
949
217k
      return(XML_CHAR_ENCODING_UTF8);
950
  /*
951
   * Although not part of the recommendation, we also
952
   * attempt an "auto-recognition" of UTF-16LE and
953
   * UTF-16BE encodings.
954
   */
955
138k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
138k
      (in[2] == 0x3F) && (in[3] == 0x00))
957
506
      return(XML_CHAR_ENCODING_UTF16LE);
958
138k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
959
138k
      (in[2] == 0x00) && (in[3] == 0x3F))
960
1.33k
      return(XML_CHAR_ENCODING_UTF16BE);
961
138k
    }
962
136k
    if (len >= 3) {
963
  /*
964
   * Errata on XML-1.0 June 20 2001
965
   * We now allow an UTF8 encoded BOM
966
   */
967
136k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
968
136k
      (in[2] == 0xBF))
969
12.2k
      return(XML_CHAR_ENCODING_UTF8);
970
136k
    }
971
    /* For UTF-16 we can recognize by the BOM */
972
124k
    if (len >= 2) {
973
124k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
974
351
      return(XML_CHAR_ENCODING_UTF16BE);
975
124k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
976
653
      return(XML_CHAR_ENCODING_UTF16LE);
977
124k
    }
978
123k
    return(XML_CHAR_ENCODING_NONE);
979
124k
}
980
981
/**
982
 * xmlCleanupEncodingAliases:
983
 *
984
 * Unregisters all aliases
985
 */
986
void
987
0
xmlCleanupEncodingAliases(void) {
988
0
    int i;
989
990
0
    if (xmlCharEncodingAliases == NULL)
991
0
  return;
992
993
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
994
0
  if (xmlCharEncodingAliases[i].name != NULL)
995
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
996
0
  if (xmlCharEncodingAliases[i].alias != NULL)
997
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
998
0
    }
999
0
    xmlCharEncodingAliasesNb = 0;
1000
0
    xmlCharEncodingAliasesMax = 0;
1001
0
    xmlFree(xmlCharEncodingAliases);
1002
0
    xmlCharEncodingAliases = NULL;
1003
0
}
1004
1005
/**
1006
 * xmlGetEncodingAlias:
1007
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1008
 *
1009
 * Lookup an encoding name for the given alias.
1010
 *
1011
 * Returns NULL if not found, otherwise the original name
1012
 */
1013
const char *
1014
193k
xmlGetEncodingAlias(const char *alias) {
1015
193k
    int i;
1016
193k
    char upper[100];
1017
1018
193k
    if (alias == NULL)
1019
0
  return(NULL);
1020
1021
193k
    if (xmlCharEncodingAliases == NULL)
1022
193k
  return(NULL);
1023
1024
0
    for (i = 0;i < 99;i++) {
1025
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1026
0
  if (upper[i] == 0) break;
1027
0
    }
1028
0
    upper[i] = 0;
1029
1030
    /*
1031
     * Walk down the list looking for a definition of the alias
1032
     */
1033
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1034
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1035
0
      return(xmlCharEncodingAliases[i].name);
1036
0
  }
1037
0
    }
1038
0
    return(NULL);
1039
0
}
1040
1041
/**
1042
 * xmlAddEncodingAlias:
1043
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1044
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1045
 *
1046
 * Registers an alias @alias for an encoding named @name. Existing alias
1047
 * will be overwritten.
1048
 *
1049
 * Returns 0 in case of success, -1 in case of error
1050
 */
1051
int
1052
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1053
0
    int i;
1054
0
    char upper[100];
1055
0
    char *nameCopy, *aliasCopy;
1056
1057
0
    if ((name == NULL) || (alias == NULL))
1058
0
  return(-1);
1059
1060
0
    for (i = 0;i < 99;i++) {
1061
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1062
0
  if (upper[i] == 0) break;
1063
0
    }
1064
0
    upper[i] = 0;
1065
1066
0
    if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1067
0
        xmlCharEncodingAliasPtr tmp;
1068
0
        size_t newSize = xmlCharEncodingAliasesMax ?
1069
0
                         xmlCharEncodingAliasesMax * 2 :
1070
0
                         20;
1071
1072
0
        tmp = (xmlCharEncodingAliasPtr)
1073
0
              xmlRealloc(xmlCharEncodingAliases,
1074
0
                         newSize * sizeof(xmlCharEncodingAlias));
1075
0
        if (tmp == NULL)
1076
0
            return(-1);
1077
0
        xmlCharEncodingAliases = tmp;
1078
0
        xmlCharEncodingAliasesMax = newSize;
1079
0
    }
1080
1081
    /*
1082
     * Walk down the list looking for a definition of the alias
1083
     */
1084
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1085
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1086
      /*
1087
       * Replace the definition.
1088
       */
1089
0
      nameCopy = xmlMemStrdup(name);
1090
0
            if (nameCopy == NULL)
1091
0
                return(-1);
1092
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1093
0
      xmlCharEncodingAliases[i].name = nameCopy;
1094
0
      return(0);
1095
0
  }
1096
0
    }
1097
    /*
1098
     * Add the definition
1099
     */
1100
0
    nameCopy = xmlMemStrdup(name);
1101
0
    if (nameCopy == NULL)
1102
0
        return(-1);
1103
0
    aliasCopy = xmlMemStrdup(upper);
1104
0
    if (aliasCopy == NULL) {
1105
0
        xmlFree(nameCopy);
1106
0
        return(-1);
1107
0
    }
1108
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1109
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1110
0
    xmlCharEncodingAliasesNb++;
1111
0
    return(0);
1112
0
}
1113
1114
/**
1115
 * xmlDelEncodingAlias:
1116
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1117
 *
1118
 * Unregisters an encoding alias @alias
1119
 *
1120
 * Returns 0 in case of success, -1 in case of error
1121
 */
1122
int
1123
0
xmlDelEncodingAlias(const char *alias) {
1124
0
    int i;
1125
1126
0
    if (alias == NULL)
1127
0
  return(-1);
1128
1129
0
    if (xmlCharEncodingAliases == NULL)
1130
0
  return(-1);
1131
    /*
1132
     * Walk down the list looking for a definition of the alias
1133
     */
1134
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1135
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1136
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1137
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1138
0
      xmlCharEncodingAliasesNb--;
1139
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1140
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1141
0
      return(0);
1142
0
  }
1143
0
    }
1144
0
    return(-1);
1145
0
}
1146
1147
/**
1148
 * xmlParseCharEncoding:
1149
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1150
 *
1151
 * Compare the string to the encoding schemes already known. Note
1152
 * that the comparison is case insensitive accordingly to the section
1153
 * [XML] 4.3.3 Character Encoding in Entities.
1154
 *
1155
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1156
 * if not recognized.
1157
 */
1158
xmlCharEncoding
1159
xmlParseCharEncoding(const char* name)
1160
7.18k
{
1161
7.18k
    const char *alias;
1162
7.18k
    char upper[500];
1163
7.18k
    int i;
1164
1165
7.18k
    if (name == NULL)
1166
0
  return(XML_CHAR_ENCODING_NONE);
1167
1168
    /*
1169
     * Do the alias resolution
1170
     */
1171
7.18k
    alias = xmlGetEncodingAlias(name);
1172
7.18k
    if (alias != NULL)
1173
0
  name = alias;
1174
1175
204k
    for (i = 0;i < 499;i++) {
1176
204k
        upper[i] = (char) toupper((unsigned char) name[i]);
1177
204k
  if (upper[i] == 0) break;
1178
204k
    }
1179
7.18k
    upper[i] = 0;
1180
1181
7.18k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1182
7.18k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1183
7.18k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1184
1185
    /*
1186
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1187
     *       already found and in use
1188
     */
1189
7.18k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1190
7.18k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1191
1192
7.18k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1193
7.18k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1194
7.18k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1195
1196
    /*
1197
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1198
     *       already found and in use
1199
     */
1200
7.18k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1201
6.72k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1202
6.72k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1203
1204
1205
6.72k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1206
6.72k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1207
6.49k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1208
1209
6.49k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1210
6.49k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1211
6.22k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1212
1213
6.22k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1214
6.22k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1215
6.22k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1216
6.22k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1217
6.22k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1218
6.22k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1219
6.22k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1220
1221
6.22k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1222
6.22k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1223
6.22k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1224
1225
#ifdef DEBUG_ENCODING
1226
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1227
#endif
1228
6.22k
    return(XML_CHAR_ENCODING_ERROR);
1229
6.22k
}
1230
1231
/**
1232
 * xmlGetCharEncodingName:
1233
 * @enc:  the encoding
1234
 *
1235
 * The "canonical" name for XML encoding.
1236
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1237
 * Section 4.3.3  Character Encoding in Entities
1238
 *
1239
 * Returns the canonical name for the given encoding
1240
 */
1241
1242
const char*
1243
1.64k
xmlGetCharEncodingName(xmlCharEncoding enc) {
1244
1.64k
    switch (enc) {
1245
0
        case XML_CHAR_ENCODING_ERROR:
1246
0
      return(NULL);
1247
0
        case XML_CHAR_ENCODING_NONE:
1248
0
      return(NULL);
1249
0
        case XML_CHAR_ENCODING_UTF8:
1250
0
      return("UTF-8");
1251
0
        case XML_CHAR_ENCODING_UTF16LE:
1252
0
      return("UTF-16");
1253
0
        case XML_CHAR_ENCODING_UTF16BE:
1254
0
      return("UTF-16");
1255
253
        case XML_CHAR_ENCODING_EBCDIC:
1256
253
            return("EBCDIC");
1257
459
        case XML_CHAR_ENCODING_UCS4LE:
1258
459
            return("ISO-10646-UCS-4");
1259
1
        case XML_CHAR_ENCODING_UCS4BE:
1260
1
            return("ISO-10646-UCS-4");
1261
210
        case XML_CHAR_ENCODING_UCS4_2143:
1262
210
            return("ISO-10646-UCS-4");
1263
212
        case XML_CHAR_ENCODING_UCS4_3412:
1264
212
            return("ISO-10646-UCS-4");
1265
3
        case XML_CHAR_ENCODING_UCS2:
1266
3
            return("ISO-10646-UCS-2");
1267
234
        case XML_CHAR_ENCODING_8859_1:
1268
234
      return("ISO-8859-1");
1269
270
        case XML_CHAR_ENCODING_8859_2:
1270
270
      return("ISO-8859-2");
1271
0
        case XML_CHAR_ENCODING_8859_3:
1272
0
      return("ISO-8859-3");
1273
0
        case XML_CHAR_ENCODING_8859_4:
1274
0
      return("ISO-8859-4");
1275
0
        case XML_CHAR_ENCODING_8859_5:
1276
0
      return("ISO-8859-5");
1277
0
        case XML_CHAR_ENCODING_8859_6:
1278
0
      return("ISO-8859-6");
1279
0
        case XML_CHAR_ENCODING_8859_7:
1280
0
      return("ISO-8859-7");
1281
0
        case XML_CHAR_ENCODING_8859_8:
1282
0
      return("ISO-8859-8");
1283
0
        case XML_CHAR_ENCODING_8859_9:
1284
0
      return("ISO-8859-9");
1285
0
        case XML_CHAR_ENCODING_2022_JP:
1286
0
            return("ISO-2022-JP");
1287
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1288
0
            return("Shift-JIS");
1289
0
        case XML_CHAR_ENCODING_EUC_JP:
1290
0
            return("EUC-JP");
1291
0
  case XML_CHAR_ENCODING_ASCII:
1292
0
      return(NULL);
1293
1.64k
    }
1294
0
    return(NULL);
1295
1.64k
}
1296
1297
/************************************************************************
1298
 *                  *
1299
 *      Char encoding handlers        *
1300
 *                  *
1301
 ************************************************************************/
1302
1303
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1304
    defined(LIBXML_ISO8859X_ENABLED)
1305
1306
#define DECLARE_ISO_FUNCS(n) \
1307
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1308
                                   const unsigned char* in, int *inlen); \
1309
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1310
                                 const unsigned char* in, int *inlen);
1311
1312
/** DOC_DISABLE */
1313
DECLARE_ISO_FUNCS(2)
1314
DECLARE_ISO_FUNCS(3)
1315
DECLARE_ISO_FUNCS(4)
1316
DECLARE_ISO_FUNCS(5)
1317
DECLARE_ISO_FUNCS(6)
1318
DECLARE_ISO_FUNCS(7)
1319
DECLARE_ISO_FUNCS(8)
1320
DECLARE_ISO_FUNCS(9)
1321
DECLARE_ISO_FUNCS(10)
1322
DECLARE_ISO_FUNCS(11)
1323
DECLARE_ISO_FUNCS(13)
1324
DECLARE_ISO_FUNCS(14)
1325
DECLARE_ISO_FUNCS(15)
1326
DECLARE_ISO_FUNCS(16)
1327
/** DOC_ENABLE */
1328
1329
#endif /* LIBXML_ISO8859X_ENABLED */
1330
1331
#ifdef LIBXML_ICONV_ENABLED
1332
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1333
#else
1334
  #define EMPTY_ICONV
1335
#endif
1336
1337
#ifdef LIBXML_ICU_ENABLED
1338
  #define EMPTY_UCONV , NULL, NULL
1339
#else
1340
  #define EMPTY_UCONV
1341
#endif
1342
1343
#define MAKE_HANDLER(name, in, out) \
1344
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1345
1346
static const xmlCharEncodingHandler defaultHandlers[] = {
1347
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1348
#ifdef LIBXML_OUTPUT_ENABLED
1349
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1350
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1351
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1352
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1353
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1354
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1355
#ifdef LIBXML_HTML_ENABLED
1356
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1357
#endif
1358
#else
1359
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1360
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1361
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1362
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1363
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1364
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1365
#endif /* LIBXML_OUTPUT_ENABLED */
1366
1367
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1368
    defined(LIBXML_ISO8859X_ENABLED)
1369
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1370
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1371
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1372
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1373
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1374
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1375
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1376
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1377
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1378
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1379
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1380
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1381
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1382
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1383
#endif
1384
};
1385
1386
#define NUM_DEFAULT_HANDLERS \
1387
3.13M
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1388
1389
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1390
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1391
1392
/* the size should be growable, but it's not a big deal ... */
1393
0
#define MAX_ENCODING_HANDLERS 50
1394
static xmlCharEncodingHandlerPtr *handlers = NULL;
1395
static int nbCharEncodingHandler = 0;
1396
1397
/**
1398
 * xmlNewCharEncodingHandler:
1399
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1400
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1401
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1402
 *
1403
 * Create and registers an xmlCharEncodingHandler.
1404
 *
1405
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1406
 */
1407
xmlCharEncodingHandlerPtr
1408
xmlNewCharEncodingHandler(const char *name,
1409
                          xmlCharEncodingInputFunc input,
1410
0
                          xmlCharEncodingOutputFunc output) {
1411
0
    xmlCharEncodingHandlerPtr handler;
1412
0
    const char *alias;
1413
0
    char upper[500];
1414
0
    int i;
1415
0
    char *up = NULL;
1416
1417
    /*
1418
     * Do the alias resolution
1419
     */
1420
0
    alias = xmlGetEncodingAlias(name);
1421
0
    if (alias != NULL)
1422
0
  name = alias;
1423
1424
    /*
1425
     * Keep only the uppercase version of the encoding.
1426
     */
1427
0
    if (name == NULL)
1428
0
  return(NULL);
1429
0
    for (i = 0;i < 499;i++) {
1430
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1431
0
  if (upper[i] == 0) break;
1432
0
    }
1433
0
    upper[i] = 0;
1434
0
    up = xmlMemStrdup(upper);
1435
0
    if (up == NULL)
1436
0
  return(NULL);
1437
1438
    /*
1439
     * allocate and fill-up an handler block.
1440
     */
1441
0
    handler = (xmlCharEncodingHandlerPtr)
1442
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1443
0
    if (handler == NULL) {
1444
0
        xmlFree(up);
1445
0
  return(NULL);
1446
0
    }
1447
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1448
0
    handler->input = input;
1449
0
    handler->output = output;
1450
0
    handler->name = up;
1451
1452
0
#ifdef LIBXML_ICONV_ENABLED
1453
0
    handler->iconv_in = NULL;
1454
0
    handler->iconv_out = NULL;
1455
0
#endif
1456
#ifdef LIBXML_ICU_ENABLED
1457
    handler->uconv_in = NULL;
1458
    handler->uconv_out = NULL;
1459
#endif
1460
1461
    /*
1462
     * registers and returns the handler.
1463
     */
1464
0
    xmlRegisterCharEncodingHandler(handler);
1465
#ifdef DEBUG_ENCODING
1466
    xmlGenericError(xmlGenericErrorContext,
1467
      "Registered encoding handler for %s\n", name);
1468
#endif
1469
0
    return(handler);
1470
0
}
1471
1472
/**
1473
 * xmlInitCharEncodingHandlers:
1474
 *
1475
 * DEPRECATED: Alias for xmlInitParser.
1476
 */
1477
void
1478
0
xmlInitCharEncodingHandlers(void) {
1479
0
    xmlInitParser();
1480
0
}
1481
1482
/**
1483
 * xmlInitEncodingInternal:
1484
 *
1485
 * Initialize the char encoding support.
1486
 */
1487
void
1488
2
xmlInitEncodingInternal(void) {
1489
2
    unsigned short int tst = 0x1234;
1490
2
    unsigned char *ptr = (unsigned char *) &tst;
1491
1492
2
    if (*ptr == 0x12) xmlLittleEndian = 0;
1493
2
    else xmlLittleEndian = 1;
1494
2
}
1495
1496
/**
1497
 * xmlCleanupCharEncodingHandlers:
1498
 *
1499
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1500
 * to free global state but see the warnings there. xmlCleanupParser
1501
 * should be only called once at program exit. In most cases, you don't
1502
 * have call cleanup functions at all.
1503
 *
1504
 * Cleanup the memory allocated for the char encoding support, it
1505
 * unregisters all the encoding handlers and the aliases.
1506
 */
1507
void
1508
0
xmlCleanupCharEncodingHandlers(void) {
1509
0
    xmlCleanupEncodingAliases();
1510
1511
0
    if (handlers == NULL) return;
1512
1513
0
    for (;nbCharEncodingHandler > 0;) {
1514
0
        nbCharEncodingHandler--;
1515
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1516
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1517
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1518
0
      xmlFree(handlers[nbCharEncodingHandler]);
1519
0
  }
1520
0
    }
1521
0
    xmlFree(handlers);
1522
0
    handlers = NULL;
1523
0
    nbCharEncodingHandler = 0;
1524
0
}
1525
1526
/**
1527
 * xmlRegisterCharEncodingHandler:
1528
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1529
 *
1530
 * Register the char encoding handler, surprising, isn't it ?
1531
 */
1532
void
1533
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1534
0
    if (handler == NULL)
1535
0
        return;
1536
0
    if (handlers == NULL) {
1537
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1538
0
        if (handlers == NULL)
1539
0
            goto free_handler;
1540
0
    }
1541
1542
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1543
0
        goto free_handler;
1544
0
    handlers[nbCharEncodingHandler++] = handler;
1545
0
    return;
1546
1547
0
free_handler:
1548
0
    if (handler != NULL) {
1549
0
        if (handler->name != NULL) {
1550
0
            xmlFree(handler->name);
1551
0
        }
1552
0
        xmlFree(handler);
1553
0
    }
1554
0
}
1555
1556
/**
1557
 * xmlGetCharEncodingHandler:
1558
 * @enc:  an xmlCharEncoding value.
1559
 *
1560
 * Search in the registered set the handler able to read/write that encoding.
1561
 *
1562
 * Returns the handler or NULL if not found
1563
 */
1564
xmlCharEncodingHandlerPtr
1565
371k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1566
371k
    xmlCharEncodingHandlerPtr handler;
1567
1568
371k
    switch (enc) {
1569
0
        case XML_CHAR_ENCODING_ERROR:
1570
0
      return(NULL);
1571
365k
        case XML_CHAR_ENCODING_NONE:
1572
365k
      return(NULL);
1573
0
        case XML_CHAR_ENCODING_UTF8:
1574
0
      return(NULL);
1575
1.15k
        case XML_CHAR_ENCODING_UTF16LE:
1576
1.15k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1577
1.69k
        case XML_CHAR_ENCODING_UTF16BE:
1578
1.69k
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1579
2.43k
        case XML_CHAR_ENCODING_EBCDIC:
1580
2.43k
            handler = xmlFindCharEncodingHandler("EBCDIC");
1581
2.43k
            if (handler != NULL) return(handler);
1582
2.43k
            handler = xmlFindCharEncodingHandler("ebcdic");
1583
2.43k
            if (handler != NULL) return(handler);
1584
2.43k
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1585
2.43k
            if (handler != NULL) return(handler);
1586
1
            handler = xmlFindCharEncodingHandler("IBM-037");
1587
1
            if (handler != NULL) return(handler);
1588
1
      break;
1589
211
        case XML_CHAR_ENCODING_UCS4BE:
1590
211
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1591
211
            if (handler != NULL) return(handler);
1592
211
            handler = xmlFindCharEncodingHandler("UCS-4");
1593
211
            if (handler != NULL) return(handler);
1594
1
            handler = xmlFindCharEncodingHandler("UCS4");
1595
1
            if (handler != NULL) return(handler);
1596
1
      break;
1597
247
        case XML_CHAR_ENCODING_UCS4LE:
1598
247
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1599
247
            if (handler != NULL) return(handler);
1600
247
            handler = xmlFindCharEncodingHandler("UCS-4");
1601
247
            if (handler != NULL) return(handler);
1602
1
            handler = xmlFindCharEncodingHandler("UCS4");
1603
1
            if (handler != NULL) return(handler);
1604
1
      break;
1605
210
        case XML_CHAR_ENCODING_UCS4_2143:
1606
210
      break;
1607
212
        case XML_CHAR_ENCODING_UCS4_3412:
1608
212
      break;
1609
0
        case XML_CHAR_ENCODING_UCS2:
1610
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1611
0
            if (handler != NULL) return(handler);
1612
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1613
0
            if (handler != NULL) return(handler);
1614
0
            handler = xmlFindCharEncodingHandler("UCS2");
1615
0
            if (handler != NULL) return(handler);
1616
0
      break;
1617
1618
      /*
1619
       * We used to keep ISO Latin encodings native in the
1620
       * generated data. This led to so many problems that
1621
       * this has been removed. One can still change this
1622
       * back by registering no-ops encoders for those
1623
       */
1624
0
        case XML_CHAR_ENCODING_8859_1:
1625
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1626
0
      if (handler != NULL) return(handler);
1627
0
      break;
1628
0
        case XML_CHAR_ENCODING_8859_2:
1629
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1630
0
      if (handler != NULL) return(handler);
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_8859_3:
1633
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1634
0
      if (handler != NULL) return(handler);
1635
0
      break;
1636
0
        case XML_CHAR_ENCODING_8859_4:
1637
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1638
0
      if (handler != NULL) return(handler);
1639
0
      break;
1640
0
        case XML_CHAR_ENCODING_8859_5:
1641
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1642
0
      if (handler != NULL) return(handler);
1643
0
      break;
1644
0
        case XML_CHAR_ENCODING_8859_6:
1645
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1646
0
      if (handler != NULL) return(handler);
1647
0
      break;
1648
0
        case XML_CHAR_ENCODING_8859_7:
1649
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1650
0
      if (handler != NULL) return(handler);
1651
0
      break;
1652
0
        case XML_CHAR_ENCODING_8859_8:
1653
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1654
0
      if (handler != NULL) return(handler);
1655
0
      break;
1656
0
        case XML_CHAR_ENCODING_8859_9:
1657
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1658
0
      if (handler != NULL) return(handler);
1659
0
      break;
1660
1661
1662
0
        case XML_CHAR_ENCODING_2022_JP:
1663
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1664
0
            if (handler != NULL) return(handler);
1665
0
      break;
1666
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1667
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1668
0
            if (handler != NULL) return(handler);
1669
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1670
0
            if (handler != NULL) return(handler);
1671
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1672
0
            if (handler != NULL) return(handler);
1673
0
      break;
1674
0
        case XML_CHAR_ENCODING_EUC_JP:
1675
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1676
0
            if (handler != NULL) return(handler);
1677
0
      break;
1678
0
  default:
1679
0
      break;
1680
371k
    }
1681
1682
#ifdef DEBUG_ENCODING
1683
    xmlGenericError(xmlGenericErrorContext,
1684
      "No handler found for encoding %d\n", enc);
1685
#endif
1686
425
    return(NULL);
1687
371k
}
1688
1689
/**
1690
 * xmlFindCharEncodingHandler:
1691
 * @name:  a string describing the char encoding.
1692
 *
1693
 * Search in the registered set the handler able to read/write that encoding
1694
 * or create a new one.
1695
 *
1696
 * Returns the handler or NULL if not found
1697
 */
1698
xmlCharEncodingHandlerPtr
1699
187k
xmlFindCharEncodingHandler(const char *name) {
1700
187k
    const char *nalias;
1701
187k
    const char *norig;
1702
187k
    xmlCharEncoding alias;
1703
187k
#ifdef LIBXML_ICONV_ENABLED
1704
187k
    xmlCharEncodingHandlerPtr enc;
1705
187k
    iconv_t icv_in, icv_out;
1706
187k
#endif /* LIBXML_ICONV_ENABLED */
1707
#ifdef LIBXML_ICU_ENABLED
1708
    xmlCharEncodingHandlerPtr encu;
1709
    uconv_t *ucv_in, *ucv_out;
1710
#endif /* LIBXML_ICU_ENABLED */
1711
187k
    char upper[100];
1712
187k
    int i;
1713
1714
187k
    if (name == NULL) return(NULL);
1715
187k
    if (name[0] == 0) return(NULL);
1716
1717
    /*
1718
     * Do the alias resolution
1719
     */
1720
186k
    norig = name;
1721
186k
    nalias = xmlGetEncodingAlias(name);
1722
186k
    if (nalias != NULL)
1723
0
  name = nalias;
1724
1725
    /*
1726
     * Check first for directly registered encoding names
1727
     */
1728
1.29M
    for (i = 0;i < 99;i++) {
1729
1.29M
        upper[i] = (char) toupper((unsigned char) name[i]);
1730
1.29M
  if (upper[i] == 0) break;
1731
1.29M
    }
1732
186k
    upper[i] = 0;
1733
1734
1.59M
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1735
1.42M
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1736
21.4k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1737
1.42M
    }
1738
1739
165k
    if (handlers != NULL) {
1740
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1741
0
            if (!strcmp(upper, handlers[i]->name)) {
1742
#ifdef DEBUG_ENCODING
1743
                xmlGenericError(xmlGenericErrorContext,
1744
                        "Found registered handler for encoding %s\n", name);
1745
#endif
1746
0
                return(handlers[i]);
1747
0
            }
1748
0
        }
1749
0
    }
1750
1751
165k
#ifdef LIBXML_ICONV_ENABLED
1752
    /* check whether iconv can handle this */
1753
165k
    icv_in = iconv_open("UTF-8", name);
1754
165k
    icv_out = iconv_open(name, "UTF-8");
1755
165k
    if (icv_in == (iconv_t) -1) {
1756
7.18k
        icv_in = iconv_open("UTF-8", upper);
1757
7.18k
    }
1758
165k
    if (icv_out == (iconv_t) -1) {
1759
7.18k
  icv_out = iconv_open(upper, "UTF-8");
1760
7.18k
    }
1761
165k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1762
158k
      enc = (xmlCharEncodingHandlerPtr)
1763
158k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1764
158k
      if (enc == NULL) {
1765
21
          iconv_close(icv_in);
1766
21
          iconv_close(icv_out);
1767
21
    return(NULL);
1768
21
      }
1769
158k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1770
158k
      enc->name = xmlMemStrdup(name);
1771
158k
            if (enc->name == NULL) {
1772
16
                xmlFree(enc);
1773
16
                iconv_close(icv_in);
1774
16
                iconv_close(icv_out);
1775
16
                return(NULL);
1776
16
            }
1777
158k
      enc->input = NULL;
1778
158k
      enc->output = NULL;
1779
158k
      enc->iconv_in = icv_in;
1780
158k
      enc->iconv_out = icv_out;
1781
#ifdef DEBUG_ENCODING
1782
            xmlGenericError(xmlGenericErrorContext,
1783
        "Found iconv handler for encoding %s\n", name);
1784
#endif
1785
158k
      return enc;
1786
158k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1787
0
      if (icv_in != (iconv_t) -1)
1788
0
    iconv_close(icv_in);
1789
0
      else
1790
0
    iconv_close(icv_out);
1791
0
    }
1792
7.18k
#endif /* LIBXML_ICONV_ENABLED */
1793
#ifdef LIBXML_ICU_ENABLED
1794
    /* check whether icu can handle this */
1795
    ucv_in = openIcuConverter(name, 1);
1796
    ucv_out = openIcuConverter(name, 0);
1797
    if (ucv_in != NULL && ucv_out != NULL) {
1798
      encu = (xmlCharEncodingHandlerPtr)
1799
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1800
      if (encu == NULL) {
1801
                closeIcuConverter(ucv_in);
1802
                closeIcuConverter(ucv_out);
1803
    return(NULL);
1804
      }
1805
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1806
      encu->name = xmlMemStrdup(name);
1807
            if (encu->name == NULL) {
1808
                xmlFree(encu);
1809
                closeIcuConverter(ucv_in);
1810
                closeIcuConverter(ucv_out);
1811
                return(NULL);
1812
            }
1813
      encu->input = NULL;
1814
      encu->output = NULL;
1815
      encu->uconv_in = ucv_in;
1816
      encu->uconv_out = ucv_out;
1817
#ifdef DEBUG_ENCODING
1818
            xmlGenericError(xmlGenericErrorContext,
1819
        "Found ICU converter handler for encoding %s\n", name);
1820
#endif
1821
      return encu;
1822
    } else if (ucv_in != NULL || ucv_out != NULL) {
1823
            closeIcuConverter(ucv_in);
1824
            closeIcuConverter(ucv_out);
1825
    }
1826
#endif /* LIBXML_ICU_ENABLED */
1827
1828
#ifdef DEBUG_ENCODING
1829
    xmlGenericError(xmlGenericErrorContext,
1830
      "No handler found for encoding %s\n", name);
1831
#endif
1832
1833
    /*
1834
     * Fallback using the canonical names
1835
     */
1836
7.18k
    alias = xmlParseCharEncoding(norig);
1837
7.18k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1838
965
        const char* canon;
1839
965
        canon = xmlGetCharEncodingName(alias);
1840
965
        if ((canon != NULL) && (strcmp(name, canon))) {
1841
505
      return(xmlFindCharEncodingHandler(canon));
1842
505
        }
1843
965
    }
1844
1845
    /* If "none of the above", give up */
1846
6.68k
    return(NULL);
1847
7.18k
}
1848
1849
/************************************************************************
1850
 *                  *
1851
 *    ICONV based generic conversion functions    *
1852
 *                  *
1853
 ************************************************************************/
1854
1855
#ifdef LIBXML_ICONV_ENABLED
1856
/**
1857
 * xmlIconvWrapper:
1858
 * @cd:   iconv converter data structure
1859
 * @out:  a pointer to an array of bytes to store the result
1860
 * @outlen:  the length of @out
1861
 * @in:  a pointer to an array of input bytes
1862
 * @inlen:  the length of @in
1863
 *
1864
 * Returns an XML_ENC_ERR code.
1865
 *
1866
 * The value of @inlen after return is the number of octets consumed
1867
 *     as the return value is positive, else unpredictable.
1868
 * The value of @outlen after return is the number of octets produced.
1869
 */
1870
static int
1871
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1872
15.6M
                const unsigned char *in, int *inlen) {
1873
15.6M
    size_t icv_inlen, icv_outlen;
1874
15.6M
    const char *icv_in = (const char *) in;
1875
15.6M
    char *icv_out = (char *) out;
1876
15.6M
    size_t ret;
1877
1878
15.6M
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1879
83
        if (outlen != NULL) *outlen = 0;
1880
83
        return(XML_ENC_ERR_INTERNAL);
1881
83
    }
1882
15.6M
    icv_inlen = *inlen;
1883
15.6M
    icv_outlen = *outlen;
1884
    /*
1885
     * Some versions take const, other versions take non-const input.
1886
     */
1887
15.6M
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1888
15.6M
    *inlen -= icv_inlen;
1889
15.6M
    *outlen -= icv_outlen;
1890
15.6M
    if (ret == (size_t) -1) {
1891
7.75M
        if (errno == EILSEQ)
1892
7.73M
            return(XML_ENC_ERR_INPUT);
1893
25.9k
        if (errno == E2BIG)
1894
9.66k
            return(XML_ENC_ERR_SPACE);
1895
16.2k
        if (errno == EINVAL)
1896
16.2k
            return(XML_ENC_ERR_PARTIAL);
1897
0
        return(XML_ENC_ERR_INTERNAL);
1898
16.2k
    }
1899
7.85M
    return(XML_ENC_ERR_SUCCESS);
1900
15.6M
}
1901
#endif /* LIBXML_ICONV_ENABLED */
1902
1903
/************************************************************************
1904
 *                  *
1905
 *    ICU based generic conversion functions    *
1906
 *                  *
1907
 ************************************************************************/
1908
1909
#ifdef LIBXML_ICU_ENABLED
1910
/**
1911
 * xmlUconvWrapper:
1912
 * @cd: ICU uconverter data structure
1913
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1914
 * @out:  a pointer to an array of bytes to store the result
1915
 * @outlen:  the length of @out
1916
 * @in:  a pointer to an array of input bytes
1917
 * @inlen:  the length of @in
1918
 * @flush: if true, indicates end of input
1919
 *
1920
 * Returns an XML_ENC_ERR code.
1921
 *
1922
 * The value of @inlen after return is the number of octets consumed
1923
 *     as the return value is positive, else unpredictable.
1924
 * The value of @outlen after return is the number of octets produced.
1925
 */
1926
static int
1927
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1928
                const unsigned char *in, int *inlen, int flush) {
1929
    const char *ucv_in = (const char *) in;
1930
    char *ucv_out = (char *) out;
1931
    UErrorCode err = U_ZERO_ERROR;
1932
1933
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1934
        if (outlen != NULL) *outlen = 0;
1935
        return(XML_ENC_ERR_INTERNAL);
1936
    }
1937
1938
    if (toUnicode) {
1939
        /* encoding => UTF-16 => UTF-8 */
1940
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1941
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1942
                       &cd->pivot_source, &cd->pivot_target,
1943
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1944
    } else {
1945
        /* UTF-8 => UTF-16 => encoding */
1946
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1947
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1948
                       &cd->pivot_source, &cd->pivot_target,
1949
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1950
    }
1951
    *inlen = ucv_in - (const char*) in;
1952
    *outlen = ucv_out - (char *) out;
1953
    if (U_SUCCESS(err)) {
1954
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1955
        if (flush)
1956
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1957
        return(XML_ENC_ERR_SUCCESS);
1958
    }
1959
    if (err == U_BUFFER_OVERFLOW_ERROR)
1960
        return(XML_ENC_ERR_SPACE);
1961
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1962
        return(XML_ENC_ERR_INPUT);
1963
    return(XML_ENC_ERR_PARTIAL);
1964
}
1965
#endif /* LIBXML_ICU_ENABLED */
1966
1967
/************************************************************************
1968
 *                  *
1969
 *    The real API used by libxml for on-the-fly conversion *
1970
 *                  *
1971
 ************************************************************************/
1972
1973
/**
1974
 * xmlEncConvertError:
1975
 * @code:  XML_ENC_ERR code
1976
 *
1977
 * Convert XML_ENC_ERR to libxml2 error codes.
1978
 */
1979
static int
1980
16.1k
xmlEncConvertError(int code) {
1981
16.1k
    int ret;
1982
1983
16.1k
    switch (code) {
1984
0
        case XML_ENC_ERR_SUCCESS:
1985
0
            ret = XML_ERR_OK;
1986
0
            break;
1987
16.1k
        case XML_ENC_ERR_INPUT:
1988
16.1k
            ret = XML_ERR_INVALID_ENCODING;
1989
16.1k
            break;
1990
0
        case XML_ENC_ERR_MEMORY:
1991
0
            ret = XML_ERR_NO_MEMORY;
1992
0
            break;
1993
22
        default:
1994
22
            ret = XML_ERR_INTERNAL_ERROR;
1995
22
            break;
1996
16.1k
    }
1997
1998
16.1k
    return(ret);
1999
16.1k
}
2000
2001
/**
2002
 * xmlEncInputChunk:
2003
 * @handler:  encoding handler
2004
 * @out:  a pointer to an array of bytes to store the result
2005
 * @outlen:  the length of @out
2006
 * @in:  a pointer to an array of input bytes
2007
 * @inlen:  the length of @in
2008
 * @flush:  flush (ICU-related)
2009
 *
2010
 * Returns an XML_ENC_ERR code.
2011
 *
2012
 * The value of @inlen after return is the number of octets consumed
2013
 *     as the return value is 0, else unpredictable.
2014
 * The value of @outlen after return is the number of octets produced.
2015
 */
2016
int
2017
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2018
262k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2019
262k
    int ret;
2020
262k
    (void)flush;
2021
2022
262k
    if (handler->input != NULL) {
2023
67.4k
        ret = handler->input(out, outlen, in, inlen);
2024
67.4k
        if (ret > 0)
2025
23.0k
           ret = XML_ENC_ERR_SUCCESS;
2026
67.4k
    }
2027
194k
#ifdef LIBXML_ICONV_ENABLED
2028
194k
    else if (handler->iconv_in != NULL) {
2029
194k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2030
194k
    }
2031
3
#endif /* LIBXML_ICONV_ENABLED */
2032
#ifdef LIBXML_ICU_ENABLED
2033
    else if (handler->uconv_in != NULL) {
2034
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2035
                              flush);
2036
    }
2037
#endif /* LIBXML_ICU_ENABLED */
2038
3
    else {
2039
3
        *outlen = 0;
2040
3
        *inlen = 0;
2041
3
        ret = XML_ENC_ERR_INTERNAL;
2042
3
    }
2043
2044
    /* Ignore space and partial errors when reading. */
2045
262k
    if ((ret == XML_ENC_ERR_SPACE) || (ret == XML_ENC_ERR_PARTIAL))
2046
13.9k
        ret = XML_ENC_ERR_SUCCESS;
2047
2048
262k
    return(ret);
2049
262k
}
2050
2051
/**
2052
 * xmlEncOutputChunk:
2053
 * @handler:  encoding handler
2054
 * @out:  a pointer to an array of bytes to store the result
2055
 * @outlen:  the length of @out
2056
 * @in:  a pointer to an array of input bytes
2057
 * @inlen:  the length of @in
2058
 *
2059
 * Returns an XML_ENC_ERR code.
2060
 *
2061
 * The value of @inlen after return is the number of octets consumed
2062
 *     as the return value is 0, else unpredictable.
2063
 * The value of @outlen after return is the number of octets produced.
2064
 */
2065
static int
2066
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2067
15.9M
                  int *outlen, const unsigned char *in, int *inlen) {
2068
15.9M
    int ret;
2069
2070
15.9M
    if (handler->output != NULL) {
2071
505k
        ret = handler->output(out, outlen, in, inlen);
2072
505k
        if (ret > 0)
2073
253k
           ret = XML_ENC_ERR_SUCCESS;
2074
505k
    }
2075
15.4M
#ifdef LIBXML_ICONV_ENABLED
2076
15.4M
    else if (handler->iconv_out != NULL) {
2077
15.4M
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2078
15.4M
    }
2079
0
#endif /* LIBXML_ICONV_ENABLED */
2080
#ifdef LIBXML_ICU_ENABLED
2081
    else if (handler->uconv_out != NULL) {
2082
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2083
                              1);
2084
    }
2085
#endif /* LIBXML_ICU_ENABLED */
2086
0
    else {
2087
0
        *outlen = 0;
2088
0
        *inlen = 0;
2089
0
        ret = XML_ENC_ERR_INTERNAL;
2090
0
    }
2091
2092
    /* We shouldn't generate partial sequences when writing. */
2093
15.9M
    if (ret == XML_ENC_ERR_PARTIAL)
2094
12.0k
        ret = XML_ENC_ERR_INTERNAL;
2095
2096
15.9M
    return(ret);
2097
15.9M
}
2098
2099
/**
2100
 * xmlCharEncFirstLine:
2101
 * @handler:   char encoding transformation data structure
2102
 * @out:  an xmlBuffer for the output.
2103
 * @in:  an xmlBuffer for the input
2104
 *
2105
 * DEPERECATED: Don't use.
2106
 */
2107
int
2108
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2109
0
                    xmlBufferPtr in) {
2110
0
    return(xmlCharEncInFunc(handler, out, in));
2111
0
}
2112
2113
/**
2114
 * xmlCharEncInput:
2115
 * @input: a parser input buffer
2116
 * @flush: try to flush all the raw buffer
2117
 *
2118
 * Generic front-end for the encoding handler on parser input
2119
 *
2120
 * Returns the number of bytes written or an XML_ENC_ERR code.
2121
 */
2122
int
2123
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2124
12.2M
{
2125
12.2M
    int ret;
2126
12.2M
    size_t written;
2127
12.2M
    size_t toconv;
2128
12.2M
    int c_in;
2129
12.2M
    int c_out;
2130
12.2M
    xmlBufPtr in;
2131
12.2M
    xmlBufPtr out;
2132
2133
12.2M
    if ((input == NULL) || (input->encoder == NULL) ||
2134
12.2M
        (input->buffer == NULL) || (input->raw == NULL))
2135
35
        return(XML_ENC_ERR_INTERNAL);
2136
12.2M
    out = input->buffer;
2137
12.2M
    in = input->raw;
2138
2139
12.2M
    toconv = xmlBufUse(in);
2140
12.2M
    if (toconv == 0)
2141
11.9M
        return (0);
2142
259k
    if ((toconv > 64 * 1024) && (flush == 0))
2143
0
        toconv = 64 * 1024;
2144
259k
    written = xmlBufAvail(out);
2145
259k
    if (toconv * 2 >= written) {
2146
24.4k
        if (xmlBufGrow(out, toconv * 2) < 0)
2147
10
            return(XML_ENC_ERR_MEMORY);
2148
24.4k
        written = xmlBufAvail(out);
2149
24.4k
    }
2150
259k
    if ((written > 128 * 1024) && (flush == 0))
2151
0
        written = 128 * 1024;
2152
2153
259k
    c_in = toconv;
2154
259k
    c_out = written;
2155
259k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2156
259k
                           xmlBufContent(in), &c_in, flush);
2157
259k
    xmlBufShrink(in, c_in);
2158
259k
    xmlBufAddLen(out, c_out);
2159
2160
259k
    if ((c_out == 0) && (ret != 0)) {
2161
16.1k
        if (input->error == 0)
2162
16.1k
            input->error = xmlEncConvertError(ret);
2163
16.1k
        return(ret);
2164
16.1k
    }
2165
2166
243k
    return (c_out);
2167
259k
}
2168
2169
/**
2170
 * xmlCharEncInFunc:
2171
 * @handler:  char encoding transformation data structure
2172
 * @out:  an xmlBuffer for the output.
2173
 * @in:  an xmlBuffer for the input
2174
 *
2175
 * Generic front-end for the encoding handler input function
2176
 *
2177
 * Returns the number of bytes written or an XML_ENC_ERR code.
2178
 */
2179
int
2180
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2181
                 xmlBufferPtr in)
2182
0
{
2183
0
    int ret;
2184
0
    int written;
2185
0
    int toconv;
2186
2187
0
    if (handler == NULL)
2188
0
        return(XML_ENC_ERR_INTERNAL);
2189
0
    if (out == NULL)
2190
0
        return(XML_ENC_ERR_INTERNAL);
2191
0
    if (in == NULL)
2192
0
        return(XML_ENC_ERR_INTERNAL);
2193
2194
0
    toconv = in->use;
2195
0
    if (toconv == 0)
2196
0
        return (0);
2197
0
    written = out->size - out->use -1; /* count '\0' */
2198
0
    if (toconv * 2 >= written) {
2199
0
        xmlBufferGrow(out, out->size + toconv * 2);
2200
0
        written = out->size - out->use - 1;
2201
0
    }
2202
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2203
0
                           in->content, &toconv, 1);
2204
0
    xmlBufferShrink(in, toconv);
2205
0
    out->use += written;
2206
0
    out->content[out->use] = 0;
2207
2208
0
    return (written? written : ret);
2209
0
}
2210
2211
#ifdef LIBXML_OUTPUT_ENABLED
2212
/**
2213
 * xmlCharEncOutput:
2214
 * @output: a parser output buffer
2215
 * @init: is this an initialization call without data
2216
 *
2217
 * Generic front-end for the encoding handler on parser output
2218
 * a first call with @init == 1 has to be made first to initiate the
2219
 * output in case of non-stateless encoding needing to initiate their
2220
 * state or the output (like the BOM in UTF16).
2221
 * In case of UTF8 sequence conversion errors for the given encoder,
2222
 * the content will be automatically remapped to a CharRef sequence.
2223
 *
2224
 * Returns the number of bytes written or an XML_ENC_ERR code.
2225
 */
2226
int
2227
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2228
40.4k
{
2229
40.4k
    int ret;
2230
40.4k
    size_t written;
2231
40.4k
    int writtentot = 0;
2232
40.4k
    size_t toconv;
2233
40.4k
    int c_in;
2234
40.4k
    int c_out;
2235
40.4k
    xmlBufPtr in;
2236
40.4k
    xmlBufPtr out;
2237
2238
40.4k
    if ((output == NULL) || (output->encoder == NULL) ||
2239
40.4k
        (output->buffer == NULL) || (output->conv == NULL))
2240
0
        return(XML_ENC_ERR_INTERNAL);
2241
40.4k
    out = output->conv;
2242
40.4k
    in = output->buffer;
2243
2244
7.98M
retry:
2245
2246
7.98M
    written = xmlBufAvail(out);
2247
2248
    /*
2249
     * First specific handling of the initialization call
2250
     */
2251
7.98M
    if (init) {
2252
460
        c_in = 0;
2253
460
        c_out = written;
2254
        /* TODO: Check return value. */
2255
460
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2256
460
                          NULL, &c_in);
2257
460
        xmlBufAddLen(out, c_out);
2258
#ifdef DEBUG_ENCODING
2259
  xmlGenericError(xmlGenericErrorContext,
2260
    "initialized encoder\n");
2261
#endif
2262
460
        return(c_out);
2263
460
    }
2264
2265
    /*
2266
     * Conversion itself.
2267
     */
2268
7.98M
    toconv = xmlBufUse(in);
2269
7.98M
    if (toconv == 0)
2270
1.55k
        return (writtentot);
2271
7.97M
    if (toconv > 64 * 1024)
2272
175
        toconv = 64 * 1024;
2273
7.97M
    if (toconv * 4 >= written) {
2274
1.26k
        xmlBufGrow(out, toconv * 4);
2275
1.26k
        written = xmlBufAvail(out);
2276
1.26k
    }
2277
7.97M
    if (written > 256 * 1024)
2278
5.93M
        written = 256 * 1024;
2279
2280
7.97M
    c_in = toconv;
2281
7.97M
    c_out = written;
2282
7.97M
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2283
7.97M
                            xmlBufContent(in), &c_in);
2284
7.97M
    xmlBufShrink(in, c_in);
2285
7.97M
    xmlBufAddLen(out, c_out);
2286
7.97M
    writtentot += c_out;
2287
2288
7.97M
    if (ret == XML_ENC_ERR_SPACE)
2289
147
        goto retry;
2290
2291
    /*
2292
     * Attempt to handle error cases
2293
     */
2294
7.97M
    if (ret == XML_ENC_ERR_INPUT) {
2295
7.94M
        xmlChar charref[20];
2296
7.94M
        int len = xmlBufUse(in);
2297
7.94M
        xmlChar *content = xmlBufContent(in);
2298
7.94M
        int cur, charrefLen;
2299
2300
7.94M
        cur = xmlGetUTF8Char(content, &len);
2301
7.94M
        if (cur <= 0)
2302
0
            goto error;
2303
2304
#ifdef DEBUG_ENCODING
2305
        xmlGenericError(xmlGenericErrorContext,
2306
                "handling output conversion error\n");
2307
        xmlGenericError(xmlGenericErrorContext,
2308
                "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2309
                content[0], content[1],
2310
                content[2], content[3]);
2311
#endif
2312
        /*
2313
         * Removes the UTF8 sequence, and replace it by a charref
2314
         * and continue the transcoding phase, hoping the error
2315
         * did not mangle the encoder state.
2316
         */
2317
7.94M
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2318
7.94M
                         "&#%d;", cur);
2319
7.94M
        xmlBufShrink(in, len);
2320
7.94M
        xmlBufGrow(out, charrefLen * 4);
2321
7.94M
        c_out = xmlBufAvail(out);
2322
7.94M
        c_in = charrefLen;
2323
7.94M
        ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2324
7.94M
                                charref, &c_in);
2325
7.94M
        if ((ret < 0) || (c_in != charrefLen)) {
2326
29
            ret = XML_ENC_ERR_INTERNAL;
2327
29
            goto error;
2328
29
        }
2329
2330
7.94M
        xmlBufAddLen(out, c_out);
2331
7.94M
        writtentot += c_out;
2332
7.94M
        goto retry;
2333
7.94M
    }
2334
2335
38.3k
error:
2336
38.3k
    if ((writtentot <= 0) && (ret != 0)) {
2337
19
        if (output->error == 0)
2338
19
            output->error = xmlEncConvertError(ret);
2339
19
        return(ret);
2340
19
    }
2341
2342
38.3k
    return(writtentot);
2343
38.3k
}
2344
#endif
2345
2346
/**
2347
 * xmlCharEncOutFunc:
2348
 * @handler:  char encoding transformation data structure
2349
 * @out:  an xmlBuffer for the output.
2350
 * @in:  an xmlBuffer for the input
2351
 *
2352
 * Generic front-end for the encoding handler output function
2353
 * a first call with @in == NULL has to be made firs to initiate the
2354
 * output in case of non-stateless encoding needing to initiate their
2355
 * state or the output (like the BOM in UTF16).
2356
 * In case of UTF8 sequence conversion errors for the given encoder,
2357
 * the content will be automatically remapped to a CharRef sequence.
2358
 *
2359
 * Returns the number of bytes written or an XML_ENC_ERR code.
2360
 */
2361
int
2362
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2363
0
                  xmlBufferPtr in) {
2364
0
    int ret;
2365
0
    int written;
2366
0
    int writtentot = 0;
2367
0
    int toconv;
2368
2369
0
    if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2370
0
    if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2371
2372
0
retry:
2373
2374
0
    written = out->size - out->use;
2375
2376
0
    if (written > 0)
2377
0
  written--; /* Gennady: count '/0' */
2378
2379
    /*
2380
     * First specific handling of in = NULL, i.e. the initialization call
2381
     */
2382
0
    if (in == NULL) {
2383
0
        toconv = 0;
2384
        /* TODO: Check return value. */
2385
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2386
0
                          NULL, &toconv);
2387
0
        out->use += written;
2388
0
        out->content[out->use] = 0;
2389
#ifdef DEBUG_ENCODING
2390
  xmlGenericError(xmlGenericErrorContext,
2391
    "initialized encoder\n");
2392
#endif
2393
0
        return(0);
2394
0
    }
2395
2396
    /*
2397
     * Conversion itself.
2398
     */
2399
0
    toconv = in->use;
2400
0
    if (toconv == 0)
2401
0
  return(0);
2402
0
    if (toconv * 4 >= written) {
2403
0
        xmlBufferGrow(out, toconv * 4);
2404
0
  written = out->size - out->use - 1;
2405
0
    }
2406
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2407
0
                            in->content, &toconv);
2408
0
    xmlBufferShrink(in, toconv);
2409
0
    out->use += written;
2410
0
    writtentot += written;
2411
0
    out->content[out->use] = 0;
2412
2413
0
    if (ret == XML_ENC_ERR_SPACE)
2414
0
        goto retry;
2415
2416
    /*
2417
     * Attempt to handle error cases
2418
     */
2419
0
    if (ret == XML_ENC_ERR_INPUT) {
2420
0
        xmlChar charref[20];
2421
0
        int len = in->use;
2422
0
        const xmlChar *utf = (const xmlChar *) in->content;
2423
0
        int cur, charrefLen;
2424
2425
0
        cur = xmlGetUTF8Char(utf, &len);
2426
0
        if (cur <= 0)
2427
0
            return(ret);
2428
2429
#ifdef DEBUG_ENCODING
2430
        xmlGenericError(xmlGenericErrorContext,
2431
                "handling output conversion error\n");
2432
        xmlGenericError(xmlGenericErrorContext,
2433
                "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2434
                in->content[0], in->content[1],
2435
                in->content[2], in->content[3]);
2436
#endif
2437
        /*
2438
         * Removes the UTF8 sequence, and replace it by a charref
2439
         * and continue the transcoding phase, hoping the error
2440
         * did not mangle the encoder state.
2441
         */
2442
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2443
0
                         "&#%d;", cur);
2444
0
        xmlBufferShrink(in, len);
2445
0
        xmlBufferGrow(out, charrefLen * 4);
2446
0
        written = out->size - out->use - 1;
2447
0
        toconv = charrefLen;
2448
0
        ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2449
0
                                charref, &toconv);
2450
0
        if ((ret < 0) || (toconv != charrefLen))
2451
0
            return(XML_ENC_ERR_INTERNAL);
2452
2453
0
        out->use += written;
2454
0
        writtentot += written;
2455
0
        out->content[out->use] = 0;
2456
0
        goto retry;
2457
0
    }
2458
0
    return(writtentot ? writtentot : ret);
2459
0
}
2460
2461
/**
2462
 * xmlCharEncCloseFunc:
2463
 * @handler:  char encoding transformation data structure
2464
 *
2465
 * Generic front-end for encoding handler close function
2466
 *
2467
 * Returns 0 if success, or -1 in case of error
2468
 */
2469
int
2470
182k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2471
182k
    int ret = 0;
2472
182k
    int tofree = 0;
2473
182k
    int i = 0;
2474
2475
182k
    if (handler == NULL) return(-1);
2476
2477
1.53M
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2478
1.37M
        if (handler == &defaultHandlers[i])
2479
24.3k
            return(0);
2480
1.37M
    }
2481
2482
158k
    if (handlers != NULL) {
2483
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2484
0
            if (handler == handlers[i])
2485
0
                return(0);
2486
0
  }
2487
0
    }
2488
158k
#ifdef LIBXML_ICONV_ENABLED
2489
    /*
2490
     * Iconv handlers can be used only once, free the whole block.
2491
     * and the associated icon resources.
2492
     */
2493
158k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2494
158k
        tofree = 1;
2495
158k
  if (handler->iconv_out != NULL) {
2496
158k
      if (iconv_close(handler->iconv_out))
2497
0
    ret = -1;
2498
158k
      handler->iconv_out = NULL;
2499
158k
  }
2500
158k
  if (handler->iconv_in != NULL) {
2501
158k
      if (iconv_close(handler->iconv_in))
2502
0
    ret = -1;
2503
158k
      handler->iconv_in = NULL;
2504
158k
  }
2505
158k
    }
2506
158k
#endif /* LIBXML_ICONV_ENABLED */
2507
#ifdef LIBXML_ICU_ENABLED
2508
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2509
        tofree = 1;
2510
  if (handler->uconv_out != NULL) {
2511
      closeIcuConverter(handler->uconv_out);
2512
      handler->uconv_out = NULL;
2513
  }
2514
  if (handler->uconv_in != NULL) {
2515
      closeIcuConverter(handler->uconv_in);
2516
      handler->uconv_in = NULL;
2517
  }
2518
    }
2519
#endif
2520
158k
    if (tofree) {
2521
        /* free up only dynamic handlers iconv/uconv */
2522
158k
        if (handler->name != NULL)
2523
158k
            xmlFree(handler->name);
2524
158k
        handler->name = NULL;
2525
158k
        xmlFree(handler);
2526
158k
    }
2527
#ifdef DEBUG_ENCODING
2528
    if (ret)
2529
        xmlGenericError(xmlGenericErrorContext,
2530
    "failed to close the encoding handler\n");
2531
    else
2532
        xmlGenericError(xmlGenericErrorContext,
2533
    "closed the encoding handler\n");
2534
#endif
2535
2536
158k
    return(ret);
2537
158k
}
2538
2539
/**
2540
 * xmlByteConsumed:
2541
 * @ctxt: an XML parser context
2542
 *
2543
 * This function provides the current index of the parser relative
2544
 * to the start of the current entity. This function is computed in
2545
 * bytes from the beginning starting at zero and finishing at the
2546
 * size in byte of the file if parsing a file. The function is
2547
 * of constant cost if the input is UTF-8 but can be costly if run
2548
 * on non-UTF-8 input.
2549
 *
2550
 * Returns the index in bytes from the beginning of the entity or -1
2551
 *         in case the index could not be computed.
2552
 */
2553
long
2554
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2555
0
    xmlParserInputPtr in;
2556
2557
0
    if (ctxt == NULL) return(-1);
2558
0
    in = ctxt->input;
2559
0
    if (in == NULL)  return(-1);
2560
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2561
0
        unsigned int unused = 0;
2562
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2563
        /*
2564
   * Encoding conversion, compute the number of unused original
2565
   * bytes from the input not consumed and subtract that from
2566
   * the raw consumed value, this is not a cheap operation
2567
   */
2568
0
        if (in->end - in->cur > 0) {
2569
0
      unsigned char convbuf[32000];
2570
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2571
0
      int toconv = in->end - in->cur, written = 32000;
2572
2573
0
      int ret;
2574
2575
0
            do {
2576
0
                toconv = in->end - cur;
2577
0
                written = 32000;
2578
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2579
0
                                        cur, &toconv);
2580
0
                if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2581
0
                    return(-1);
2582
0
                unused += written;
2583
0
                cur += toconv;
2584
0
            } while (ret == XML_ENC_ERR_SPACE);
2585
0
  }
2586
0
  if (in->buf->rawconsumed < unused)
2587
0
      return(-1);
2588
0
  return(in->buf->rawconsumed - unused);
2589
0
    }
2590
0
    return(in->consumed + (in->cur - in->base));
2591
0
}
2592
2593
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2594
#ifdef LIBXML_ISO8859X_ENABLED
2595
2596
/**
2597
 * UTF8ToISO8859x:
2598
 * @out:  a pointer to an array of bytes to store the result
2599
 * @outlen:  the length of @out
2600
 * @in:  a pointer to an array of UTF-8 chars
2601
 * @inlen:  the length of @in
2602
 * @xlattable: the 2-level transcoding table
2603
 *
2604
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2605
 * block of chars out.
2606
 *
2607
 * Returns the number of bytes written or an XML_ENC_ERR code.
2608
 *
2609
 * The value of @inlen after return is the number of octets consumed
2610
 * as the return value is positive, else unpredictable.
2611
 * The value of @outlen after return is the number of octets consumed.
2612
 */
2613
static int
2614
UTF8ToISO8859x(unsigned char* out, int *outlen,
2615
              const unsigned char* in, int *inlen,
2616
              const unsigned char* const xlattable) {
2617
    const unsigned char* outstart = out;
2618
    const unsigned char* inend;
2619
    const unsigned char* instart = in;
2620
    const unsigned char* processed = in;
2621
2622
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2623
        (xlattable == NULL))
2624
  return(XML_ENC_ERR_INTERNAL);
2625
    if (in == NULL) {
2626
        /*
2627
        * initialization nothing to do
2628
        */
2629
        *outlen = 0;
2630
        *inlen = 0;
2631
        return(0);
2632
    }
2633
    inend = in + (*inlen);
2634
    while (in < inend) {
2635
        unsigned char d = *in++;
2636
        if  (d < 0x80)  {
2637
            *out++ = d;
2638
        } else if (d < 0xC0) {
2639
            /* trailing byte in leading position */
2640
            *outlen = out - outstart;
2641
            *inlen = processed - instart;
2642
            return(XML_ENC_ERR_INPUT);
2643
        } else if (d < 0xE0) {
2644
            unsigned char c;
2645
            if (!(in < inend)) {
2646
                /* trailing byte not in input buffer */
2647
                *outlen = out - outstart;
2648
                *inlen = processed - instart;
2649
                return(XML_ENC_ERR_PARTIAL);
2650
            }
2651
            c = *in++;
2652
            if ((c & 0xC0) != 0x80) {
2653
                /* not a trailing byte */
2654
                *outlen = out - outstart;
2655
                *inlen = processed - instart;
2656
                return(XML_ENC_ERR_INPUT);
2657
            }
2658
            c = c & 0x3F;
2659
            d = d & 0x1F;
2660
            d = xlattable [48 + c + xlattable [d] * 64];
2661
            if (d == 0) {
2662
                /* not in character set */
2663
                *outlen = out - outstart;
2664
                *inlen = processed - instart;
2665
                return(XML_ENC_ERR_INPUT);
2666
            }
2667
            *out++ = d;
2668
        } else if (d < 0xF0) {
2669
            unsigned char c1;
2670
            unsigned char c2;
2671
            if (!(in < inend - 1)) {
2672
                /* trailing bytes not in input buffer */
2673
                *outlen = out - outstart;
2674
                *inlen = processed - instart;
2675
                return(XML_ENC_ERR_PARTIAL);
2676
            }
2677
            c1 = *in++;
2678
            if ((c1 & 0xC0) != 0x80) {
2679
                /* not a trailing byte (c1) */
2680
                *outlen = out - outstart;
2681
                *inlen = processed - instart;
2682
                return(XML_ENC_ERR_INPUT);
2683
            }
2684
            c2 = *in++;
2685
            if ((c2 & 0xC0) != 0x80) {
2686
                /* not a trailing byte (c2) */
2687
                *outlen = out - outstart;
2688
                *inlen = processed - instart;
2689
                return(XML_ENC_ERR_INPUT);
2690
            }
2691
            c1 = c1 & 0x3F;
2692
            c2 = c2 & 0x3F;
2693
      d = d & 0x0F;
2694
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2695
      xlattable [32 + d] * 64] * 64];
2696
            if (d == 0) {
2697
                /* not in character set */
2698
                *outlen = out - outstart;
2699
                *inlen = processed - instart;
2700
                return(XML_ENC_ERR_INPUT);
2701
            }
2702
            *out++ = d;
2703
        } else {
2704
            /* cannot transcode >= U+010000 */
2705
            *outlen = out - outstart;
2706
            *inlen = processed - instart;
2707
            return(XML_ENC_ERR_INPUT);
2708
        }
2709
        processed = in;
2710
    }
2711
    *outlen = out - outstart;
2712
    *inlen = processed - instart;
2713
    return(*outlen);
2714
}
2715
2716
/**
2717
 * ISO8859xToUTF8
2718
 * @out:  a pointer to an array of bytes to store the result
2719
 * @outlen:  the length of @out
2720
 * @in:  a pointer to an array of ISO Latin 1 chars
2721
 * @inlen:  the length of @in
2722
 *
2723
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2724
 * block of chars out.
2725
 *
2726
 * Returns the number of bytes written or an XML_ENC_ERR code.
2727
 *
2728
 * The value of @inlen after return is the number of octets consumed
2729
 * The value of @outlen after return is the number of octets produced.
2730
 */
2731
static int
2732
ISO8859xToUTF8(unsigned char* out, int *outlen,
2733
              const unsigned char* in, int *inlen,
2734
              unsigned short const *unicodetable) {
2735
    unsigned char* outstart = out;
2736
    unsigned char* outend;
2737
    const unsigned char* instart = in;
2738
    const unsigned char* inend;
2739
    const unsigned char* instop;
2740
    unsigned int c;
2741
2742
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2743
        (in == NULL) || (unicodetable == NULL))
2744
  return(XML_ENC_ERR_INTERNAL);
2745
    outend = out + *outlen;
2746
    inend = in + *inlen;
2747
    instop = inend;
2748
2749
    while ((in < inend) && (out < outend - 2)) {
2750
        if (*in >= 0x80) {
2751
            c = unicodetable [*in - 0x80];
2752
            if (c == 0) {
2753
                /* undefined code point */
2754
                *outlen = out - outstart;
2755
                *inlen = in - instart;
2756
                return(XML_ENC_ERR_INPUT);
2757
            }
2758
            if (c < 0x800) {
2759
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2760
                *out++ = (c & 0x3F) | 0x80;
2761
            } else {
2762
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2763
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2764
                *out++ = (c & 0x3F) | 0x80;
2765
            }
2766
            ++in;
2767
        }
2768
        if (instop - in > outend - out) instop = in + (outend - out);
2769
        while ((*in < 0x80) && (in < instop)) {
2770
            *out++ = *in++;
2771
        }
2772
    }
2773
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2774
        *out++ =  *in++;
2775
    }
2776
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2777
        *out++ =  *in++;
2778
    }
2779
    *outlen = out - outstart;
2780
    *inlen = in - instart;
2781
    return (*outlen);
2782
}
2783
2784
2785
/************************************************************************
2786
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2787
 ************************************************************************/
2788
2789
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2790
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2791
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2792
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2793
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2794
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2795
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2796
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2797
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2798
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2799
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2800
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2801
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2802
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2803
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2804
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2805
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2806
};
2807
2808
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2809
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2810
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2817
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2818
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2819
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2820
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2821
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2822
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2824
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2825
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2829
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2830
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2831
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2832
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2833
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2834
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2835
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2836
};
2837
2838
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2839
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2840
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2841
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2842
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2843
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2844
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2845
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2846
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2847
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2848
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2849
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2850
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2851
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2852
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2853
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2854
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2855
};
2856
2857
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2858
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2866
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2867
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2868
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2869
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2870
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2871
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2872
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2875
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2883
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2884
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2885
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2886
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2887
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2888
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2889
};
2890
2891
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2892
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2893
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2894
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2895
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2896
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2897
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2898
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2899
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2900
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2901
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2902
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2903
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2904
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2905
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2906
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2907
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2908
};
2909
2910
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2911
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2912
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2919
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2920
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2921
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2922
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2923
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2924
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2925
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2926
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2927
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2928
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2929
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2930
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2931
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2932
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2935
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2936
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2937
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2938
};
2939
2940
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2941
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2942
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2943
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2944
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2945
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2946
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2947
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2948
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2949
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2950
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2951
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2952
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2953
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2954
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2955
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2956
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2957
};
2958
2959
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2960
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2968
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2969
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2970
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2972
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2973
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2974
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2975
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2976
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2977
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987
};
2988
2989
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2990
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2991
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2992
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2993
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2994
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2995
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2996
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2997
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2998
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2999
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3000
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3001
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3002
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3003
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3004
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3005
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3006
};
3007
3008
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3009
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3011
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3017
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3018
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3019
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3025
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3026
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3027
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3028
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3029
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032
};
3033
3034
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3035
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3036
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3037
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3038
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3039
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3040
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3041
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3042
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3043
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3044
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3045
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3046
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3047
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3048
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3049
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3050
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3051
};
3052
3053
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3054
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3055
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3062
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3063
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3064
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3065
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3071
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3078
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3079
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3080
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3081
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3082
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3083
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085
};
3086
3087
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3088
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3089
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3090
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3091
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3092
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3093
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3094
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3095
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3096
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3097
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3098
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3099
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3100
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3101
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3102
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3103
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3104
};
3105
3106
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3107
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3115
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3116
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3117
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3118
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3124
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3126
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3131
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3132
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3136
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3137
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138
};
3139
3140
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3146
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3147
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3148
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3149
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3150
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3151
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3152
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3153
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3154
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3155
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3156
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3160
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3170
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3171
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3172
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3173
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3174
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3175
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3181
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
};
3184
3185
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3186
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3187
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3188
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3189
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3190
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3191
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3192
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3193
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3194
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3195
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3196
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3197
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3198
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3199
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3200
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3201
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3202
};
3203
3204
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3205
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3213
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3214
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3215
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3217
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3218
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3219
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3220
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3223
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3224
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3233
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3234
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3235
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3236
};
3237
3238
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3239
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3240
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3241
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3242
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3243
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3244
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3245
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3246
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3247
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3248
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3249
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3250
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3251
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3252
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3253
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3254
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3255
};
3256
3257
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3258
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3266
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3267
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3273
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3274
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3275
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3276
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3277
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3282
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
};
3286
3287
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3288
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3289
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3290
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3291
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3292
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3293
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3294
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3295
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3296
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3297
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3298
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3299
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3300
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3301
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3302
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3303
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3304
};
3305
3306
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3307
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3315
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3316
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3317
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3318
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3324
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3327
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3328
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3330
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3332
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3333
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3334
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3335
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3337
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3346
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3347
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3348
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3349
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3350
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3351
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3352
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3353
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3354
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3355
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3356
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3360
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3370
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3375
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3377
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3395
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3398
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3400
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3401
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3402
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3403
};
3404
3405
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3406
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3407
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3408
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3409
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3410
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3411
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3412
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3413
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3414
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3415
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3416
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3417
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3418
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3419
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3420
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3421
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3422
};
3423
3424
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3425
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3433
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3434
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3435
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3436
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3443
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3448
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3449
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3450
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3451
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3452
};
3453
3454
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3455
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3456
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3457
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3458
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3459
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3460
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3461
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3462
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3463
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3464
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3465
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3466
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3467
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3468
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3469
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3470
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3471
};
3472
3473
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3474
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3482
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3483
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3484
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3485
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3486
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3487
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3491
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3493
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3500
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3502
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3503
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3507
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3510
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3511
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3512
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3513
};
3514
3515
3516
/*
3517
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3518
 */
3519
3520
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3521
    const unsigned char* in, int *inlen) {
3522
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3523
}
3524
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3525
    const unsigned char* in, int *inlen) {
3526
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3527
}
3528
3529
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3530
    const unsigned char* in, int *inlen) {
3531
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3532
}
3533
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3534
    const unsigned char* in, int *inlen) {
3535
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3536
}
3537
3538
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3539
    const unsigned char* in, int *inlen) {
3540
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3541
}
3542
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3543
    const unsigned char* in, int *inlen) {
3544
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3545
}
3546
3547
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3548
    const unsigned char* in, int *inlen) {
3549
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3550
}
3551
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3552
    const unsigned char* in, int *inlen) {
3553
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3554
}
3555
3556
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3557
    const unsigned char* in, int *inlen) {
3558
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3559
}
3560
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3561
    const unsigned char* in, int *inlen) {
3562
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3563
}
3564
3565
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3566
    const unsigned char* in, int *inlen) {
3567
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3568
}
3569
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3570
    const unsigned char* in, int *inlen) {
3571
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3572
}
3573
3574
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3575
    const unsigned char* in, int *inlen) {
3576
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3577
}
3578
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3579
    const unsigned char* in, int *inlen) {
3580
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3581
}
3582
3583
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3584
    const unsigned char* in, int *inlen) {
3585
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3586
}
3587
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3588
    const unsigned char* in, int *inlen) {
3589
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3590
}
3591
3592
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3593
    const unsigned char* in, int *inlen) {
3594
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3595
}
3596
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3597
    const unsigned char* in, int *inlen) {
3598
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3599
}
3600
3601
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3602
    const unsigned char* in, int *inlen) {
3603
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3604
}
3605
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3606
    const unsigned char* in, int *inlen) {
3607
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3608
}
3609
3610
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3611
    const unsigned char* in, int *inlen) {
3612
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3613
}
3614
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3615
    const unsigned char* in, int *inlen) {
3616
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3617
}
3618
3619
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3620
    const unsigned char* in, int *inlen) {
3621
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3622
}
3623
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3624
    const unsigned char* in, int *inlen) {
3625
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3626
}
3627
3628
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3629
    const unsigned char* in, int *inlen) {
3630
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3631
}
3632
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3633
    const unsigned char* in, int *inlen) {
3634
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3635
}
3636
3637
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3638
    const unsigned char* in, int *inlen) {
3639
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3640
}
3641
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3642
    const unsigned char* in, int *inlen) {
3643
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3644
}
3645
3646
#endif
3647
#endif
3648