Coverage Report

Created: 2023-06-07 06:14

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
#ifdef LIBXML_ICU_ENABLED
82
static uconv_t*
83
openIcuConverter(const char* name, int toUnicode)
84
{
85
  UErrorCode status = U_ZERO_ERROR;
86
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
87
  if (conv == NULL)
88
    return NULL;
89
90
  conv->pivot_source = conv->pivot_buf;
91
  conv->pivot_target = conv->pivot_buf;
92
93
  conv->uconv = ucnv_open(name, &status);
94
  if (U_FAILURE(status))
95
    goto error;
96
97
  status = U_ZERO_ERROR;
98
  if (toUnicode) {
99
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
100
                        NULL, NULL, NULL, &status);
101
  }
102
  else {
103
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
104
                        NULL, NULL, NULL, &status);
105
  }
106
  if (U_FAILURE(status))
107
    goto error;
108
109
  status = U_ZERO_ERROR;
110
  conv->utf8 = ucnv_open("UTF-8", &status);
111
  if (U_SUCCESS(status))
112
    return conv;
113
114
error:
115
  if (conv->uconv)
116
    ucnv_close(conv->uconv);
117
  xmlFree(conv);
118
  return NULL;
119
}
120
121
static void
122
closeIcuConverter(uconv_t *conv)
123
{
124
  if (conv != NULL) {
125
    ucnv_close(conv->uconv);
126
    ucnv_close(conv->utf8);
127
    xmlFree(conv);
128
  }
129
}
130
#endif /* LIBXML_ICU_ENABLED */
131
132
/************************************************************************
133
 *                  *
134
 *    Conversions To/From UTF8 encoding     *
135
 *                  *
136
 ************************************************************************/
137
138
/**
139
 * asciiToUTF8:
140
 * @out:  a pointer to an array of bytes to store the result
141
 * @outlen:  the length of @out
142
 * @in:  a pointer to an array of ASCII chars
143
 * @inlen:  the length of @in
144
 *
145
 * Take a block of ASCII chars in and try to convert it to an UTF-8
146
 * block of chars out.
147
 *
148
 * Returns the number of bytes written or an XML_ENC_ERR code.
149
 *
150
 * The value of @inlen after return is the number of octets consumed
151
 *     if the return value is positive, else unpredictable.
152
 * The value of @outlen after return is the number of octets produced.
153
 */
154
static int
155
asciiToUTF8(unsigned char* out, int *outlen,
156
2
              const unsigned char* in, int *inlen) {
157
2
    unsigned char* outstart = out;
158
2
    const unsigned char* base = in;
159
2
    const unsigned char* processed = in;
160
2
    unsigned char* outend = out + *outlen;
161
2
    const unsigned char* inend;
162
2
    unsigned int c;
163
164
2
    inend = in + (*inlen);
165
1.41M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
166
1.41M
  c= *in++;
167
168
1.41M
        if (out >= outend)
169
0
      break;
170
1.41M
        if (c < 0x80) {
171
1.41M
      *out++ = c;
172
1.41M
  } else {
173
2
      *outlen = out - outstart;
174
2
      *inlen = processed - base;
175
2
      return(XML_ENC_ERR_INPUT);
176
2
  }
177
178
1.41M
  processed = (const unsigned char*) in;
179
1.41M
    }
180
0
    *outlen = out - outstart;
181
0
    *inlen = processed - base;
182
0
    return(*outlen);
183
2
}
184
185
#ifdef LIBXML_OUTPUT_ENABLED
186
/**
187
 * UTF8Toascii:
188
 * @out:  a pointer to an array of bytes to store the result
189
 * @outlen:  the length of @out
190
 * @in:  a pointer to an array of UTF-8 chars
191
 * @inlen:  the length of @in
192
 *
193
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
194
 * block of chars out.
195
 *
196
 * Returns the number of bytes written or an XML_ENC_ERR code.
197
 *
198
 * The value of @inlen after return is the number of octets consumed
199
 *     if the return value is positive, else unpredictable.
200
 * The value of @outlen after return is the number of octets produced.
201
 */
202
static int
203
UTF8Toascii(unsigned char* out, int *outlen,
204
0
              const unsigned char* in, int *inlen) {
205
0
    const unsigned char* processed = in;
206
0
    const unsigned char* outend;
207
0
    const unsigned char* outstart = out;
208
0
    const unsigned char* instart = in;
209
0
    const unsigned char* inend;
210
0
    unsigned int c, d;
211
0
    int trailing;
212
213
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
214
0
        return(XML_ENC_ERR_INTERNAL);
215
0
    if (in == NULL) {
216
        /*
217
   * initialization nothing to do
218
   */
219
0
  *outlen = 0;
220
0
  *inlen = 0;
221
0
  return(0);
222
0
    }
223
0
    inend = in + (*inlen);
224
0
    outend = out + (*outlen);
225
0
    while (in < inend) {
226
0
  d = *in++;
227
0
  if      (d < 0x80)  { c= d; trailing= 0; }
228
0
  else if (d < 0xC0) {
229
      /* trailing byte in leading position */
230
0
      *outlen = out - outstart;
231
0
      *inlen = processed - instart;
232
0
      return(XML_ENC_ERR_INPUT);
233
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
234
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
235
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
236
0
  else {
237
      /* no chance for this in Ascii */
238
0
      *outlen = out - outstart;
239
0
      *inlen = processed - instart;
240
0
      return(XML_ENC_ERR_INPUT);
241
0
  }
242
243
0
  if (inend - in < trailing) {
244
0
      break;
245
0
  }
246
247
0
  for ( ; trailing; trailing--) {
248
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
249
0
    break;
250
0
      c <<= 6;
251
0
      c |= d & 0x3F;
252
0
  }
253
254
  /* assertion: c is a single UTF-4 value */
255
0
  if (c < 0x80) {
256
0
      if (out >= outend)
257
0
    break;
258
0
      *out++ = c;
259
0
  } else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(XML_ENC_ERR_INPUT);
264
0
  }
265
0
  processed = in;
266
0
    }
267
0
    *outlen = out - outstart;
268
0
    *inlen = processed - instart;
269
0
    return(*outlen);
270
0
}
271
#endif /* LIBXML_OUTPUT_ENABLED */
272
273
/**
274
 * isolat1ToUTF8:
275
 * @out:  a pointer to an array of bytes to store the result
276
 * @outlen:  the length of @out
277
 * @in:  a pointer to an array of ISO Latin 1 chars
278
 * @inlen:  the length of @in
279
 *
280
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
281
 * block of chars out.
282
 *
283
 * Returns the number of bytes written or an XML_ENC_ERR code.
284
 *
285
 * The value of @inlen after return is the number of octets consumed
286
 *     if the return value is positive, else unpredictable.
287
 * The value of @outlen after return is the number of octets produced.
288
 */
289
int
290
isolat1ToUTF8(unsigned char* out, int *outlen,
291
37
              const unsigned char* in, int *inlen) {
292
37
    unsigned char* outstart = out;
293
37
    const unsigned char* base = in;
294
37
    unsigned char* outend;
295
37
    const unsigned char* inend;
296
37
    const unsigned char* instop;
297
298
37
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
299
0
  return(XML_ENC_ERR_INTERNAL);
300
301
37
    outend = out + *outlen;
302
37
    inend = in + (*inlen);
303
37
    instop = inend;
304
305
8.72M
    while ((in < inend) && (out < outend - 1)) {
306
8.72M
  if (*in >= 0x80) {
307
8.72M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
308
8.72M
            *out++ = ((*in) & 0x3F) | 0x80;
309
8.72M
      ++in;
310
8.72M
  }
311
8.72M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
312
9.92M
  while ((in < instop) && (*in < 0x80)) {
313
1.20M
      *out++ = *in++;
314
1.20M
  }
315
8.72M
    }
316
37
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
317
0
        *out++ = *in++;
318
0
    }
319
37
    *outlen = out - outstart;
320
37
    *inlen = in - base;
321
37
    return(*outlen);
322
37
}
323
324
/**
325
 * UTF8ToUTF8:
326
 * @out:  a pointer to an array of bytes to store the result
327
 * @outlen:  the length of @out
328
 * @inb:  a pointer to an array of UTF-8 chars
329
 * @inlenb:  the length of @in in UTF-8 chars
330
 *
331
 * No op copy operation for UTF8 handling.
332
 *
333
 * Returns the number of bytes written or an XML_ENC_ERR code.
334
 *
335
 *     The value of *inlen after return is the number of octets consumed
336
 *     if the return value is positive, else unpredictable.
337
 */
338
static int
339
UTF8ToUTF8(unsigned char* out, int *outlen,
340
           const unsigned char* inb, int *inlenb)
341
0
{
342
0
    int len;
343
344
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
345
0
  return(XML_ENC_ERR_INTERNAL);
346
0
    if (inb == NULL) {
347
        /* inb == NULL means output is initialized. */
348
0
        *outlen = 0;
349
0
        *inlenb = 0;
350
0
        return(0);
351
0
    }
352
0
    if (*outlen > *inlenb) {
353
0
  len = *inlenb;
354
0
    } else {
355
0
  len = *outlen;
356
0
    }
357
0
    if (len < 0)
358
0
  return(XML_ENC_ERR_INTERNAL);
359
360
    /*
361
     * FIXME: Conversion functions must assure valid UTF-8, so we have
362
     * to check for UTF-8 validity. Preferably, this converter shouldn't
363
     * be used at all.
364
     */
365
0
    memcpy(out, inb, len);
366
367
0
    *outlen = len;
368
0
    *inlenb = len;
369
0
    return(*outlen);
370
0
}
371
372
373
#ifdef LIBXML_OUTPUT_ENABLED
374
/**
375
 * UTF8Toisolat1:
376
 * @out:  a pointer to an array of bytes to store the result
377
 * @outlen:  the length of @out
378
 * @in:  a pointer to an array of UTF-8 chars
379
 * @inlen:  the length of @in
380
 *
381
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
382
 * block of chars out.
383
 *
384
 * Returns the number of bytes written or an XML_ENC_ERR code.
385
 *
386
 * The value of @inlen after return is the number of octets consumed
387
 *     if the return value is positive, else unpredictable.
388
 * The value of @outlen after return is the number of octets produced.
389
 */
390
int
391
UTF8Toisolat1(unsigned char* out, int *outlen,
392
0
              const unsigned char* in, int *inlen) {
393
0
    const unsigned char* processed = in;
394
0
    const unsigned char* outend;
395
0
    const unsigned char* outstart = out;
396
0
    const unsigned char* instart = in;
397
0
    const unsigned char* inend;
398
0
    unsigned int c, d;
399
0
    int trailing;
400
401
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
402
0
        return(XML_ENC_ERR_INTERNAL);
403
0
    if (in == NULL) {
404
        /*
405
   * initialization nothing to do
406
   */
407
0
  *outlen = 0;
408
0
  *inlen = 0;
409
0
  return(0);
410
0
    }
411
0
    inend = in + (*inlen);
412
0
    outend = out + (*outlen);
413
0
    while (in < inend) {
414
0
  d = *in++;
415
0
  if      (d < 0x80)  { c= d; trailing= 0; }
416
0
  else if (d < 0xC0) {
417
      /* trailing byte in leading position */
418
0
      *outlen = out - outstart;
419
0
      *inlen = processed - instart;
420
0
      return(XML_ENC_ERR_INPUT);
421
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
422
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
423
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
424
0
  else {
425
      /* no chance for this in IsoLat1 */
426
0
      *outlen = out - outstart;
427
0
      *inlen = processed - instart;
428
0
      return(XML_ENC_ERR_INPUT);
429
0
  }
430
431
0
  if (inend - in < trailing) {
432
0
      break;
433
0
  }
434
435
0
  for ( ; trailing; trailing--) {
436
0
      if (in >= inend)
437
0
    break;
438
0
      if (((d= *in++) & 0xC0) != 0x80) {
439
0
    *outlen = out - outstart;
440
0
    *inlen = processed - instart;
441
0
    return(XML_ENC_ERR_INPUT);
442
0
      }
443
0
      c <<= 6;
444
0
      c |= d & 0x3F;
445
0
  }
446
447
  /* assertion: c is a single UTF-4 value */
448
0
  if (c <= 0xFF) {
449
0
      if (out >= outend)
450
0
    break;
451
0
      *out++ = c;
452
0
  } else {
453
      /* no chance for this in IsoLat1 */
454
0
      *outlen = out - outstart;
455
0
      *inlen = processed - instart;
456
0
      return(XML_ENC_ERR_INPUT);
457
0
  }
458
0
  processed = in;
459
0
    }
460
0
    *outlen = out - outstart;
461
0
    *inlen = processed - instart;
462
0
    return(*outlen);
463
0
}
464
#endif /* LIBXML_OUTPUT_ENABLED */
465
466
/**
467
 * UTF16LEToUTF8:
468
 * @out:  a pointer to an array of bytes to store the result
469
 * @outlen:  the length of @out
470
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
471
 * @inlenb:  the length of @in in UTF-16LE chars
472
 *
473
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
474
 * block of chars out. This function assumes the endian property
475
 * is the same between the native type of this machine and the
476
 * inputed one.
477
 *
478
 * Returns the number of bytes written or an XML_ENC_ERR code.
479
 *
480
 * The value of *inlen after return is the number of octets consumed
481
 * if the return value is positive, else unpredictable.
482
 */
483
static int
484
UTF16LEToUTF8(unsigned char* out, int *outlen,
485
            const unsigned char* inb, int *inlenb)
486
1.06k
{
487
1.06k
    unsigned char* outstart = out;
488
1.06k
    const unsigned char* processed = inb;
489
1.06k
    unsigned char* outend;
490
1.06k
    unsigned short* in = (unsigned short*) inb;
491
1.06k
    unsigned short* inend;
492
1.06k
    unsigned int c, d, inlen;
493
1.06k
    unsigned char *tmp;
494
1.06k
    int bits;
495
496
1.06k
    if (*outlen == 0) {
497
0
        *inlenb = 0;
498
0
        return(0);
499
0
    }
500
1.06k
    outend = out + *outlen;
501
1.06k
    if ((*inlenb % 2) == 1)
502
1.05k
        (*inlenb)--;
503
1.06k
    inlen = *inlenb / 2;
504
1.06k
    inend = in + inlen;
505
2.59M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
506
2.59M
        if (xmlLittleEndian) {
507
2.59M
      c= *in++;
508
2.59M
  } else {
509
0
      tmp = (unsigned char *) in;
510
0
      c = *tmp++;
511
0
      c = c | (*tmp << 8);
512
0
      in++;
513
0
  }
514
2.59M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
515
4
      if (in >= inend) {           /* handle split mutli-byte characters */
516
0
    break;
517
0
      }
518
4
      if (xmlLittleEndian) {
519
4
    d = *in++;
520
4
      } else {
521
0
    tmp = (unsigned char *) in;
522
0
    d = *tmp++;
523
0
    d = d | (*tmp << 8);
524
0
    in++;
525
0
      }
526
4
            if ((d & 0xFC00) == 0xDC00) {
527
0
                c &= 0x03FF;
528
0
                c <<= 10;
529
0
                c |= d & 0x03FF;
530
0
                c += 0x10000;
531
0
            }
532
4
            else {
533
4
    *outlen = out - outstart;
534
4
    *inlenb = processed - inb;
535
4
          return(XML_ENC_ERR_INPUT);
536
4
      }
537
4
        }
538
539
  /* assertion: c is a single UTF-4 value */
540
2.59M
        if (out >= outend)
541
0
      break;
542
2.59M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
543
2.58M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
544
2.58M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
545
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
546
547
7.75M
        for ( ; bits >= 0; bits-= 6) {
548
5.16M
            if (out >= outend)
549
0
          break;
550
5.16M
            *out++= ((c >> bits) & 0x3F) | 0x80;
551
5.16M
        }
552
2.59M
  processed = (const unsigned char*) in;
553
2.59M
    }
554
1.06k
    *outlen = out - outstart;
555
1.06k
    *inlenb = processed - inb;
556
1.06k
    return(*outlen);
557
1.06k
}
558
559
#ifdef LIBXML_OUTPUT_ENABLED
560
/**
561
 * UTF8ToUTF16LE:
562
 * @outb:  a pointer to an array of bytes to store the result
563
 * @outlen:  the length of @outb
564
 * @in:  a pointer to an array of UTF-8 chars
565
 * @inlen:  the length of @in
566
 *
567
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
568
 * block of chars out.
569
 *
570
 * Returns the number of bytes written or an XML_ENC_ERR code.
571
 */
572
static int
573
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
574
            const unsigned char* in, int *inlen)
575
0
{
576
0
    unsigned short* out = (unsigned short*) outb;
577
0
    const unsigned char* processed = in;
578
0
    const unsigned char *const instart = in;
579
0
    unsigned short* outstart= out;
580
0
    unsigned short* outend;
581
0
    const unsigned char* inend;
582
0
    unsigned int c, d;
583
0
    int trailing;
584
0
    unsigned char *tmp;
585
0
    unsigned short tmp1, tmp2;
586
587
    /* UTF16LE encoding has no BOM */
588
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
589
0
        return(XML_ENC_ERR_INTERNAL);
590
0
    if (in == NULL) {
591
0
  *outlen = 0;
592
0
  *inlen = 0;
593
0
  return(0);
594
0
    }
595
0
    inend= in + *inlen;
596
0
    outend = out + (*outlen / 2);
597
0
    while (in < inend) {
598
0
      d= *in++;
599
0
      if      (d < 0x80)  { c= d; trailing= 0; }
600
0
      else if (d < 0xC0) {
601
          /* trailing byte in leading position */
602
0
    *outlen = (out - outstart) * 2;
603
0
    *inlen = processed - instart;
604
0
    return(XML_ENC_ERR_INPUT);
605
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
606
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
607
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
608
0
      else {
609
  /* no chance for this in UTF-16 */
610
0
  *outlen = (out - outstart) * 2;
611
0
  *inlen = processed - instart;
612
0
  return(XML_ENC_ERR_INPUT);
613
0
      }
614
615
0
      if (inend - in < trailing) {
616
0
          break;
617
0
      }
618
619
0
      for ( ; trailing; trailing--) {
620
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
621
0
        break;
622
0
          c <<= 6;
623
0
          c |= d & 0x3F;
624
0
      }
625
626
      /* assertion: c is a single UTF-4 value */
627
0
        if (c < 0x10000) {
628
0
            if (out >= outend)
629
0
          break;
630
0
      if (xmlLittleEndian) {
631
0
    *out++ = c;
632
0
      } else {
633
0
    tmp = (unsigned char *) out;
634
0
    *tmp = (unsigned char) c; /* Explicit truncation */
635
0
    *(tmp + 1) = c >> 8 ;
636
0
    out++;
637
0
      }
638
0
        }
639
0
        else if (c < 0x110000) {
640
0
            if (out+1 >= outend)
641
0
          break;
642
0
            c -= 0x10000;
643
0
      if (xmlLittleEndian) {
644
0
    *out++ = 0xD800 | (c >> 10);
645
0
    *out++ = 0xDC00 | (c & 0x03FF);
646
0
      } else {
647
0
    tmp1 = 0xD800 | (c >> 10);
648
0
    tmp = (unsigned char *) out;
649
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
650
0
    *(tmp + 1) = tmp1 >> 8;
651
0
    out++;
652
653
0
    tmp2 = 0xDC00 | (c & 0x03FF);
654
0
    tmp = (unsigned char *) out;
655
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
656
0
    *(tmp + 1) = tmp2 >> 8;
657
0
    out++;
658
0
      }
659
0
        }
660
0
        else
661
0
      break;
662
0
  processed = in;
663
0
    }
664
0
    *outlen = (out - outstart) * 2;
665
0
    *inlen = processed - instart;
666
0
    return(*outlen);
667
0
}
668
669
/**
670
 * UTF8ToUTF16:
671
 * @outb:  a pointer to an array of bytes to store the result
672
 * @outlen:  the length of @outb
673
 * @in:  a pointer to an array of UTF-8 chars
674
 * @inlen:  the length of @in
675
 *
676
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
677
 * block of chars out.
678
 *
679
 * Returns the number of bytes written or an XML_ENC_ERR code.
680
 */
681
static int
682
UTF8ToUTF16(unsigned char* outb, int *outlen,
683
            const unsigned char* in, int *inlen)
684
0
{
685
0
    if (in == NULL) {
686
  /*
687
   * initialization, add the Byte Order Mark for UTF-16LE
688
   */
689
0
        if (*outlen >= 2) {
690
0
      outb[0] = 0xFF;
691
0
      outb[1] = 0xFE;
692
0
      *outlen = 2;
693
0
      *inlen = 0;
694
#ifdef DEBUG_ENCODING
695
            xmlGenericError(xmlGenericErrorContext,
696
        "Added FFFE Byte Order Mark\n");
697
#endif
698
0
      return(2);
699
0
  }
700
0
  *outlen = 0;
701
0
  *inlen = 0;
702
0
  return(0);
703
0
    }
704
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
705
0
}
706
#endif /* LIBXML_OUTPUT_ENABLED */
707
708
/**
709
 * UTF16BEToUTF8:
710
 * @out:  a pointer to an array of bytes to store the result
711
 * @outlen:  the length of @out
712
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
713
 * @inlenb:  the length of @in in UTF-16 chars
714
 *
715
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
716
 * block of chars out. This function assumes the endian property
717
 * is the same between the native type of this machine and the
718
 * inputed one.
719
 *
720
 * Returns the number of bytes written or an XML_ENC_ERR code.
721
 *
722
 * The value of *inlen after return is the number of octets consumed
723
 * if the return value is positive, else unpredictable.
724
 */
725
static int
726
UTF16BEToUTF8(unsigned char* out, int *outlen,
727
            const unsigned char* inb, int *inlenb)
728
1.14k
{
729
1.14k
    unsigned char* outstart = out;
730
1.14k
    const unsigned char* processed = inb;
731
1.14k
    unsigned char* outend;
732
1.14k
    unsigned short* in = (unsigned short*) inb;
733
1.14k
    unsigned short* inend;
734
1.14k
    unsigned int c, d, inlen;
735
1.14k
    unsigned char *tmp;
736
1.14k
    int bits;
737
738
1.14k
    if (*outlen == 0) {
739
0
        *inlenb = 0;
740
0
        return(0);
741
0
    }
742
1.14k
    outend = out + *outlen;
743
1.14k
    if ((*inlenb % 2) == 1)
744
1.13k
        (*inlenb)--;
745
1.14k
    inlen = *inlenb / 2;
746
1.14k
    inend= in + inlen;
747
4.00M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
748
4.00M
  if (xmlLittleEndian) {
749
4.00M
      tmp = (unsigned char *) in;
750
4.00M
      c = *tmp++;
751
4.00M
      c = (c << 8) | *tmp;
752
4.00M
      in++;
753
4.00M
  } else {
754
0
      c= *in++;
755
0
  }
756
4.00M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
757
0
      if (in >= inend) {           /* handle split mutli-byte characters */
758
0
                break;
759
0
      }
760
0
      if (xmlLittleEndian) {
761
0
    tmp = (unsigned char *) in;
762
0
    d = *tmp++;
763
0
    d = (d << 8) | *tmp;
764
0
    in++;
765
0
      } else {
766
0
    d= *in++;
767
0
      }
768
0
            if ((d & 0xFC00) == 0xDC00) {
769
0
                c &= 0x03FF;
770
0
                c <<= 10;
771
0
                c |= d & 0x03FF;
772
0
                c += 0x10000;
773
0
            }
774
0
            else {
775
0
    *outlen = out - outstart;
776
0
    *inlenb = processed - inb;
777
0
          return(XML_ENC_ERR_INPUT);
778
0
      }
779
0
        }
780
781
  /* assertion: c is a single UTF-4 value */
782
4.00M
        if (out >= outend)
783
0
      break;
784
4.00M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
785
4.00M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
786
4.00M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
787
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
788
789
12.0M
        for ( ; bits >= 0; bits-= 6) {
790
8.00M
            if (out >= outend)
791
0
          break;
792
8.00M
            *out++= ((c >> bits) & 0x3F) | 0x80;
793
8.00M
        }
794
4.00M
  processed = (const unsigned char*) in;
795
4.00M
    }
796
1.14k
    *outlen = out - outstart;
797
1.14k
    *inlenb = processed - inb;
798
1.14k
    return(*outlen);
799
1.14k
}
800
801
#ifdef LIBXML_OUTPUT_ENABLED
802
/**
803
 * UTF8ToUTF16BE:
804
 * @outb:  a pointer to an array of bytes to store the result
805
 * @outlen:  the length of @outb
806
 * @in:  a pointer to an array of UTF-8 chars
807
 * @inlen:  the length of @in
808
 *
809
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
810
 * block of chars out.
811
 *
812
 * Returns the number of bytes written or an XML_ENC_ERR code.
813
 */
814
static int
815
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
816
            const unsigned char* in, int *inlen)
817
0
{
818
0
    unsigned short* out = (unsigned short*) outb;
819
0
    const unsigned char* processed = in;
820
0
    const unsigned char *const instart = in;
821
0
    unsigned short* outstart= out;
822
0
    unsigned short* outend;
823
0
    const unsigned char* inend;
824
0
    unsigned int c, d;
825
0
    int trailing;
826
0
    unsigned char *tmp;
827
0
    unsigned short tmp1, tmp2;
828
829
    /* UTF-16BE has no BOM */
830
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
831
0
        return(XML_ENC_ERR_INTERNAL);
832
0
    if (in == NULL) {
833
0
  *outlen = 0;
834
0
  *inlen = 0;
835
0
  return(0);
836
0
    }
837
0
    inend= in + *inlen;
838
0
    outend = out + (*outlen / 2);
839
0
    while (in < inend) {
840
0
      d= *in++;
841
0
      if      (d < 0x80)  { c= d; trailing= 0; }
842
0
      else if (d < 0xC0)  {
843
          /* trailing byte in leading position */
844
0
    *outlen = out - outstart;
845
0
    *inlen = processed - instart;
846
0
    return(XML_ENC_ERR_INPUT);
847
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
848
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
849
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
850
0
      else {
851
          /* no chance for this in UTF-16 */
852
0
    *outlen = out - outstart;
853
0
    *inlen = processed - instart;
854
0
    return(XML_ENC_ERR_INPUT);
855
0
      }
856
857
0
      if (inend - in < trailing) {
858
0
          break;
859
0
      }
860
861
0
      for ( ; trailing; trailing--) {
862
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
863
0
          c <<= 6;
864
0
          c |= d & 0x3F;
865
0
      }
866
867
      /* assertion: c is a single UTF-4 value */
868
0
        if (c < 0x10000) {
869
0
            if (out >= outend)  break;
870
0
      if (xmlLittleEndian) {
871
0
    tmp = (unsigned char *) out;
872
0
    *tmp = c >> 8;
873
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
874
0
    out++;
875
0
      } else {
876
0
    *out++ = c;
877
0
      }
878
0
        }
879
0
        else if (c < 0x110000) {
880
0
            if (out+1 >= outend)  break;
881
0
            c -= 0x10000;
882
0
      if (xmlLittleEndian) {
883
0
    tmp1 = 0xD800 | (c >> 10);
884
0
    tmp = (unsigned char *) out;
885
0
    *tmp = tmp1 >> 8;
886
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
887
0
    out++;
888
889
0
    tmp2 = 0xDC00 | (c & 0x03FF);
890
0
    tmp = (unsigned char *) out;
891
0
    *tmp = tmp2 >> 8;
892
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
893
0
    out++;
894
0
      } else {
895
0
    *out++ = 0xD800 | (c >> 10);
896
0
    *out++ = 0xDC00 | (c & 0x03FF);
897
0
      }
898
0
        }
899
0
        else
900
0
      break;
901
0
  processed = in;
902
0
    }
903
0
    *outlen = (out - outstart) * 2;
904
0
    *inlen = processed - instart;
905
0
    return(*outlen);
906
0
}
907
#endif /* LIBXML_OUTPUT_ENABLED */
908
909
/************************************************************************
910
 *                  *
911
 *    Generic encoding handling routines      *
912
 *                  *
913
 ************************************************************************/
914
915
/**
916
 * xmlDetectCharEncoding:
917
 * @in:  a pointer to the first bytes of the XML entity, must be at least
918
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
919
 * @len:  pointer to the length of the buffer
920
 *
921
 * Guess the encoding of the entity using the first bytes of the entity content
922
 * according to the non-normative appendix F of the XML-1.0 recommendation.
923
 *
924
 * Returns one of the XML_CHAR_ENCODING_... values.
925
 */
926
xmlCharEncoding
927
xmlDetectCharEncoding(const unsigned char* in, int len)
928
406
{
929
406
    if (in == NULL)
930
0
        return(XML_CHAR_ENCODING_NONE);
931
406
    if (len >= 4) {
932
406
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
933
406
      (in[2] == 0x00) && (in[3] == 0x3C))
934
0
      return(XML_CHAR_ENCODING_UCS4BE);
935
406
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
936
406
      (in[2] == 0x00) && (in[3] == 0x00))
937
0
      return(XML_CHAR_ENCODING_UCS4LE);
938
406
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
939
406
      (in[2] == 0x3C) && (in[3] == 0x00))
940
0
      return(XML_CHAR_ENCODING_UCS4_2143);
941
406
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
942
406
      (in[2] == 0x00) && (in[3] == 0x00))
943
0
      return(XML_CHAR_ENCODING_UCS4_3412);
944
406
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
945
406
      (in[2] == 0xA7) && (in[3] == 0x94))
946
0
      return(XML_CHAR_ENCODING_EBCDIC);
947
406
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
948
406
      (in[2] == 0x78) && (in[3] == 0x6D))
949
98
      return(XML_CHAR_ENCODING_UTF8);
950
  /*
951
   * Although not part of the recommendation, we also
952
   * attempt an "auto-recognition" of UTF-16LE and
953
   * UTF-16BE encodings.
954
   */
955
308
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
308
      (in[2] == 0x3F) && (in[3] == 0x00))
957
19
      return(XML_CHAR_ENCODING_UTF16LE);
958
289
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
959
289
      (in[2] == 0x00) && (in[3] == 0x3F))
960
0
      return(XML_CHAR_ENCODING_UTF16BE);
961
289
    }
962
289
    if (len >= 3) {
963
  /*
964
   * Errata on XML-1.0 June 20 2001
965
   * We now allow an UTF8 encoded BOM
966
   */
967
289
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
968
289
      (in[2] == 0xBF))
969
0
      return(XML_CHAR_ENCODING_UTF8);
970
289
    }
971
    /* For UTF-16 we can recognize by the BOM */
972
289
    if (len >= 2) {
973
289
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
974
24
      return(XML_CHAR_ENCODING_UTF16BE);
975
265
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
976
8
      return(XML_CHAR_ENCODING_UTF16LE);
977
265
    }
978
257
    return(XML_CHAR_ENCODING_NONE);
979
289
}
980
981
/**
982
 * xmlCleanupEncodingAliases:
983
 *
984
 * Unregisters all aliases
985
 */
986
void
987
0
xmlCleanupEncodingAliases(void) {
988
0
    int i;
989
990
0
    if (xmlCharEncodingAliases == NULL)
991
0
  return;
992
993
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
994
0
  if (xmlCharEncodingAliases[i].name != NULL)
995
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
996
0
  if (xmlCharEncodingAliases[i].alias != NULL)
997
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
998
0
    }
999
0
    xmlCharEncodingAliasesNb = 0;
1000
0
    xmlCharEncodingAliasesMax = 0;
1001
0
    xmlFree(xmlCharEncodingAliases);
1002
0
    xmlCharEncodingAliases = NULL;
1003
0
}
1004
1005
/**
1006
 * xmlGetEncodingAlias:
1007
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1008
 *
1009
 * Lookup an encoding name for the given alias.
1010
 *
1011
 * Returns NULL if not found, otherwise the original name
1012
 */
1013
const char *
1014
88
xmlGetEncodingAlias(const char *alias) {
1015
88
    int i;
1016
88
    char upper[100];
1017
1018
88
    if (alias == NULL)
1019
0
  return(NULL);
1020
1021
88
    if (xmlCharEncodingAliases == NULL)
1022
88
  return(NULL);
1023
1024
0
    for (i = 0;i < 99;i++) {
1025
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1026
0
  if (upper[i] == 0) break;
1027
0
    }
1028
0
    upper[i] = 0;
1029
1030
    /*
1031
     * Walk down the list looking for a definition of the alias
1032
     */
1033
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1034
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1035
0
      return(xmlCharEncodingAliases[i].name);
1036
0
  }
1037
0
    }
1038
0
    return(NULL);
1039
0
}
1040
1041
/**
1042
 * xmlAddEncodingAlias:
1043
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1044
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1045
 *
1046
 * Registers an alias @alias for an encoding named @name. Existing alias
1047
 * will be overwritten.
1048
 *
1049
 * Returns 0 in case of success, -1 in case of error
1050
 */
1051
int
1052
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1053
0
    int i;
1054
0
    char upper[100];
1055
0
    char *nameCopy, *aliasCopy;
1056
1057
0
    if ((name == NULL) || (alias == NULL))
1058
0
  return(-1);
1059
1060
0
    for (i = 0;i < 99;i++) {
1061
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1062
0
  if (upper[i] == 0) break;
1063
0
    }
1064
0
    upper[i] = 0;
1065
1066
0
    if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1067
0
        xmlCharEncodingAliasPtr tmp;
1068
0
        size_t newSize = xmlCharEncodingAliasesMax ?
1069
0
                         xmlCharEncodingAliasesMax * 2 :
1070
0
                         20;
1071
1072
0
        tmp = (xmlCharEncodingAliasPtr)
1073
0
              xmlRealloc(xmlCharEncodingAliases,
1074
0
                         newSize * sizeof(xmlCharEncodingAlias));
1075
0
        if (tmp == NULL)
1076
0
            return(-1);
1077
0
        xmlCharEncodingAliases = tmp;
1078
0
        xmlCharEncodingAliasesMax = newSize;
1079
0
    }
1080
1081
    /*
1082
     * Walk down the list looking for a definition of the alias
1083
     */
1084
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1085
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1086
      /*
1087
       * Replace the definition.
1088
       */
1089
0
      nameCopy = xmlMemStrdup(name);
1090
0
            if (nameCopy == NULL)
1091
0
                return(-1);
1092
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1093
0
      xmlCharEncodingAliases[i].name = nameCopy;
1094
0
      return(0);
1095
0
  }
1096
0
    }
1097
    /*
1098
     * Add the definition
1099
     */
1100
0
    nameCopy = xmlMemStrdup(name);
1101
0
    if (nameCopy == NULL)
1102
0
        return(-1);
1103
0
    aliasCopy = xmlMemStrdup(upper);
1104
0
    if (aliasCopy == NULL) {
1105
0
        xmlFree(nameCopy);
1106
0
        return(-1);
1107
0
    }
1108
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1109
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1110
0
    xmlCharEncodingAliasesNb++;
1111
0
    return(0);
1112
0
}
1113
1114
/**
1115
 * xmlDelEncodingAlias:
1116
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1117
 *
1118
 * Unregisters an encoding alias @alias
1119
 *
1120
 * Returns 0 in case of success, -1 in case of error
1121
 */
1122
int
1123
0
xmlDelEncodingAlias(const char *alias) {
1124
0
    int i;
1125
1126
0
    if (alias == NULL)
1127
0
  return(-1);
1128
1129
0
    if (xmlCharEncodingAliases == NULL)
1130
0
  return(-1);
1131
    /*
1132
     * Walk down the list looking for a definition of the alias
1133
     */
1134
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1135
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1136
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1137
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1138
0
      xmlCharEncodingAliasesNb--;
1139
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1140
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1141
0
      return(0);
1142
0
  }
1143
0
    }
1144
0
    return(-1);
1145
0
}
1146
1147
/**
1148
 * xmlParseCharEncoding:
1149
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1150
 *
1151
 * Compare the string to the encoding schemes already known. Note
1152
 * that the comparison is case insensitive accordingly to the section
1153
 * [XML] 4.3.3 Character Encoding in Entities.
1154
 *
1155
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1156
 * if not recognized.
1157
 */
1158
xmlCharEncoding
1159
xmlParseCharEncoding(const char* name)
1160
0
{
1161
0
    const char *alias;
1162
0
    char upper[500];
1163
0
    int i;
1164
1165
0
    if (name == NULL)
1166
0
  return(XML_CHAR_ENCODING_NONE);
1167
1168
    /*
1169
     * Do the alias resolution
1170
     */
1171
0
    alias = xmlGetEncodingAlias(name);
1172
0
    if (alias != NULL)
1173
0
  name = alias;
1174
1175
0
    for (i = 0;i < 499;i++) {
1176
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1177
0
  if (upper[i] == 0) break;
1178
0
    }
1179
0
    upper[i] = 0;
1180
1181
0
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1182
0
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1183
0
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1184
1185
    /*
1186
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1187
     *       already found and in use
1188
     */
1189
0
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1190
0
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1191
1192
0
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1193
0
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1194
0
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1195
1196
    /*
1197
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1198
     *       already found and in use
1199
     */
1200
0
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1201
0
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1202
0
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1203
1204
1205
0
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1206
0
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1207
0
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1208
1209
0
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1210
0
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1211
0
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1212
1213
0
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1214
0
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1215
0
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1216
0
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1217
0
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1218
0
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1219
0
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1220
1221
0
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1222
0
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1223
0
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1224
1225
#ifdef DEBUG_ENCODING
1226
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1227
#endif
1228
0
    return(XML_CHAR_ENCODING_ERROR);
1229
0
}
1230
1231
/**
1232
 * xmlGetCharEncodingName:
1233
 * @enc:  the encoding
1234
 *
1235
 * The "canonical" name for XML encoding.
1236
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1237
 * Section 4.3.3  Character Encoding in Entities
1238
 *
1239
 * Returns the canonical name for the given encoding
1240
 */
1241
1242
const char*
1243
0
xmlGetCharEncodingName(xmlCharEncoding enc) {
1244
0
    switch (enc) {
1245
0
        case XML_CHAR_ENCODING_ERROR:
1246
0
      return(NULL);
1247
0
        case XML_CHAR_ENCODING_NONE:
1248
0
      return(NULL);
1249
0
        case XML_CHAR_ENCODING_UTF8:
1250
0
      return("UTF-8");
1251
0
        case XML_CHAR_ENCODING_UTF16LE:
1252
0
      return("UTF-16");
1253
0
        case XML_CHAR_ENCODING_UTF16BE:
1254
0
      return("UTF-16");
1255
0
        case XML_CHAR_ENCODING_EBCDIC:
1256
0
            return("EBCDIC");
1257
0
        case XML_CHAR_ENCODING_UCS4LE:
1258
0
            return("ISO-10646-UCS-4");
1259
0
        case XML_CHAR_ENCODING_UCS4BE:
1260
0
            return("ISO-10646-UCS-4");
1261
0
        case XML_CHAR_ENCODING_UCS4_2143:
1262
0
            return("ISO-10646-UCS-4");
1263
0
        case XML_CHAR_ENCODING_UCS4_3412:
1264
0
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS2:
1266
0
            return("ISO-10646-UCS-2");
1267
0
        case XML_CHAR_ENCODING_8859_1:
1268
0
      return("ISO-8859-1");
1269
0
        case XML_CHAR_ENCODING_8859_2:
1270
0
      return("ISO-8859-2");
1271
0
        case XML_CHAR_ENCODING_8859_3:
1272
0
      return("ISO-8859-3");
1273
0
        case XML_CHAR_ENCODING_8859_4:
1274
0
      return("ISO-8859-4");
1275
0
        case XML_CHAR_ENCODING_8859_5:
1276
0
      return("ISO-8859-5");
1277
0
        case XML_CHAR_ENCODING_8859_6:
1278
0
      return("ISO-8859-6");
1279
0
        case XML_CHAR_ENCODING_8859_7:
1280
0
      return("ISO-8859-7");
1281
0
        case XML_CHAR_ENCODING_8859_8:
1282
0
      return("ISO-8859-8");
1283
0
        case XML_CHAR_ENCODING_8859_9:
1284
0
      return("ISO-8859-9");
1285
0
        case XML_CHAR_ENCODING_2022_JP:
1286
0
            return("ISO-2022-JP");
1287
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1288
0
            return("Shift-JIS");
1289
0
        case XML_CHAR_ENCODING_EUC_JP:
1290
0
            return("EUC-JP");
1291
0
  case XML_CHAR_ENCODING_ASCII:
1292
0
      return(NULL);
1293
0
    }
1294
0
    return(NULL);
1295
0
}
1296
1297
/************************************************************************
1298
 *                  *
1299
 *      Char encoding handlers        *
1300
 *                  *
1301
 ************************************************************************/
1302
1303
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1304
    defined(LIBXML_ISO8859X_ENABLED)
1305
1306
#define DECLARE_ISO_FUNCS(n) \
1307
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1308
                                   const unsigned char* in, int *inlen); \
1309
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1310
                                 const unsigned char* in, int *inlen);
1311
1312
/** DOC_DISABLE */
1313
DECLARE_ISO_FUNCS(2)
1314
DECLARE_ISO_FUNCS(3)
1315
DECLARE_ISO_FUNCS(4)
1316
DECLARE_ISO_FUNCS(5)
1317
DECLARE_ISO_FUNCS(6)
1318
DECLARE_ISO_FUNCS(7)
1319
DECLARE_ISO_FUNCS(8)
1320
DECLARE_ISO_FUNCS(9)
1321
DECLARE_ISO_FUNCS(10)
1322
DECLARE_ISO_FUNCS(11)
1323
DECLARE_ISO_FUNCS(13)
1324
DECLARE_ISO_FUNCS(14)
1325
DECLARE_ISO_FUNCS(15)
1326
DECLARE_ISO_FUNCS(16)
1327
/** DOC_ENABLE */
1328
1329
#endif /* LIBXML_ISO8859X_ENABLED */
1330
1331
#ifdef LIBXML_ICONV_ENABLED
1332
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1333
#else
1334
  #define EMPTY_ICONV
1335
#endif
1336
1337
#ifdef LIBXML_ICU_ENABLED
1338
  #define EMPTY_UCONV , NULL, NULL
1339
#else
1340
  #define EMPTY_UCONV
1341
#endif
1342
1343
#define MAKE_HANDLER(name, in, out) \
1344
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1345
1346
static const xmlCharEncodingHandler defaultHandlers[] = {
1347
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1348
#ifdef LIBXML_OUTPUT_ENABLED
1349
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1350
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1351
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1352
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1353
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1354
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1355
#ifdef LIBXML_HTML_ENABLED
1356
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1357
#endif
1358
#else
1359
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1360
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1361
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1362
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1363
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1364
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1365
#endif /* LIBXML_OUTPUT_ENABLED */
1366
1367
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1368
    defined(LIBXML_ISO8859X_ENABLED)
1369
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1370
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1371
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1372
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1373
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1374
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1375
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1376
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1377
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1378
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1379
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1380
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1381
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1382
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1383
#endif
1384
};
1385
1386
#define NUM_DEFAULT_HANDLERS \
1387
1.40k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1388
1389
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1390
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1391
1392
/* the size should be growable, but it's not a big deal ... */
1393
0
#define MAX_ENCODING_HANDLERS 50
1394
static xmlCharEncodingHandlerPtr *handlers = NULL;
1395
static int nbCharEncodingHandler = 0;
1396
1397
/**
1398
 * xmlNewCharEncodingHandler:
1399
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1400
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1401
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1402
 *
1403
 * Create and registers an xmlCharEncodingHandler.
1404
 *
1405
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1406
 */
1407
xmlCharEncodingHandlerPtr
1408
xmlNewCharEncodingHandler(const char *name,
1409
                          xmlCharEncodingInputFunc input,
1410
0
                          xmlCharEncodingOutputFunc output) {
1411
0
    xmlCharEncodingHandlerPtr handler;
1412
0
    const char *alias;
1413
0
    char upper[500];
1414
0
    int i;
1415
0
    char *up = NULL;
1416
1417
    /*
1418
     * Do the alias resolution
1419
     */
1420
0
    alias = xmlGetEncodingAlias(name);
1421
0
    if (alias != NULL)
1422
0
  name = alias;
1423
1424
    /*
1425
     * Keep only the uppercase version of the encoding.
1426
     */
1427
0
    if (name == NULL)
1428
0
  return(NULL);
1429
0
    for (i = 0;i < 499;i++) {
1430
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1431
0
  if (upper[i] == 0) break;
1432
0
    }
1433
0
    upper[i] = 0;
1434
0
    up = xmlMemStrdup(upper);
1435
0
    if (up == NULL)
1436
0
  return(NULL);
1437
1438
    /*
1439
     * allocate and fill-up an handler block.
1440
     */
1441
0
    handler = (xmlCharEncodingHandlerPtr)
1442
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1443
0
    if (handler == NULL) {
1444
0
        xmlFree(up);
1445
0
  return(NULL);
1446
0
    }
1447
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1448
0
    handler->input = input;
1449
0
    handler->output = output;
1450
0
    handler->name = up;
1451
1452
0
#ifdef LIBXML_ICONV_ENABLED
1453
0
    handler->iconv_in = NULL;
1454
0
    handler->iconv_out = NULL;
1455
0
#endif
1456
#ifdef LIBXML_ICU_ENABLED
1457
    handler->uconv_in = NULL;
1458
    handler->uconv_out = NULL;
1459
#endif
1460
1461
    /*
1462
     * registers and returns the handler.
1463
     */
1464
0
    xmlRegisterCharEncodingHandler(handler);
1465
#ifdef DEBUG_ENCODING
1466
    xmlGenericError(xmlGenericErrorContext,
1467
      "Registered encoding handler for %s\n", name);
1468
#endif
1469
0
    return(handler);
1470
0
}
1471
1472
/**
1473
 * xmlInitCharEncodingHandlers:
1474
 *
1475
 * DEPRECATED: Alias for xmlInitParser.
1476
 */
1477
void
1478
0
xmlInitCharEncodingHandlers(void) {
1479
0
    xmlInitParser();
1480
0
}
1481
1482
/**
1483
 * xmlInitEncodingInternal:
1484
 *
1485
 * Initialize the char encoding support.
1486
 */
1487
void
1488
2
xmlInitEncodingInternal(void) {
1489
2
    unsigned short int tst = 0x1234;
1490
2
    unsigned char *ptr = (unsigned char *) &tst;
1491
1492
2
    if (*ptr == 0x12) xmlLittleEndian = 0;
1493
2
    else xmlLittleEndian = 1;
1494
2
}
1495
1496
/**
1497
 * xmlCleanupCharEncodingHandlers:
1498
 *
1499
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1500
 * to free global state but see the warnings there. xmlCleanupParser
1501
 * should be only called once at program exit. In most cases, you don't
1502
 * have call cleanup functions at all.
1503
 *
1504
 * Cleanup the memory allocated for the char encoding support, it
1505
 * unregisters all the encoding handlers and the aliases.
1506
 */
1507
void
1508
0
xmlCleanupCharEncodingHandlers(void) {
1509
0
    xmlCleanupEncodingAliases();
1510
1511
0
    if (handlers == NULL) return;
1512
1513
0
    for (;nbCharEncodingHandler > 0;) {
1514
0
        nbCharEncodingHandler--;
1515
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1516
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1517
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1518
0
      xmlFree(handlers[nbCharEncodingHandler]);
1519
0
  }
1520
0
    }
1521
0
    xmlFree(handlers);
1522
0
    handlers = NULL;
1523
0
    nbCharEncodingHandler = 0;
1524
0
}
1525
1526
/**
1527
 * xmlRegisterCharEncodingHandler:
1528
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1529
 *
1530
 * Register the char encoding handler, surprising, isn't it ?
1531
 */
1532
void
1533
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1534
0
    if (handler == NULL)
1535
0
        return;
1536
0
    if (handlers == NULL) {
1537
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1538
0
        if (handlers == NULL)
1539
0
            goto free_handler;
1540
0
    }
1541
1542
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1543
0
        goto free_handler;
1544
0
    handlers[nbCharEncodingHandler++] = handler;
1545
0
    return;
1546
1547
0
free_handler:
1548
0
    if (handler != NULL) {
1549
0
        if (handler->name != NULL) {
1550
0
            xmlFree(handler->name);
1551
0
        }
1552
0
        xmlFree(handler);
1553
0
    }
1554
0
}
1555
1556
/**
1557
 * xmlGetCharEncodingHandler:
1558
 * @enc:  an xmlCharEncoding value.
1559
 *
1560
 * Search in the registered set the handler able to read/write that encoding.
1561
 *
1562
 * Returns the handler or NULL if not found
1563
 */
1564
xmlCharEncodingHandlerPtr
1565
461
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1566
461
    xmlCharEncodingHandlerPtr handler;
1567
1568
461
    switch (enc) {
1569
0
        case XML_CHAR_ENCODING_ERROR:
1570
0
      return(NULL);
1571
410
        case XML_CHAR_ENCODING_NONE:
1572
410
      return(NULL);
1573
0
        case XML_CHAR_ENCODING_UTF8:
1574
0
      return(NULL);
1575
27
        case XML_CHAR_ENCODING_UTF16LE:
1576
27
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1577
24
        case XML_CHAR_ENCODING_UTF16BE:
1578
24
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1579
0
        case XML_CHAR_ENCODING_EBCDIC:
1580
0
            handler = xmlFindCharEncodingHandler("EBCDIC");
1581
0
            if (handler != NULL) return(handler);
1582
0
            handler = xmlFindCharEncodingHandler("ebcdic");
1583
0
            if (handler != NULL) return(handler);
1584
0
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1585
0
            if (handler != NULL) return(handler);
1586
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1587
0
            if (handler != NULL) return(handler);
1588
0
      break;
1589
0
        case XML_CHAR_ENCODING_UCS4BE:
1590
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1591
0
            if (handler != NULL) return(handler);
1592
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1593
0
            if (handler != NULL) return(handler);
1594
0
            handler = xmlFindCharEncodingHandler("UCS4");
1595
0
            if (handler != NULL) return(handler);
1596
0
      break;
1597
0
        case XML_CHAR_ENCODING_UCS4LE:
1598
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1599
0
            if (handler != NULL) return(handler);
1600
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1601
0
            if (handler != NULL) return(handler);
1602
0
            handler = xmlFindCharEncodingHandler("UCS4");
1603
0
            if (handler != NULL) return(handler);
1604
0
      break;
1605
0
        case XML_CHAR_ENCODING_UCS4_2143:
1606
0
      break;
1607
0
        case XML_CHAR_ENCODING_UCS4_3412:
1608
0
      break;
1609
0
        case XML_CHAR_ENCODING_UCS2:
1610
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1611
0
            if (handler != NULL) return(handler);
1612
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1613
0
            if (handler != NULL) return(handler);
1614
0
            handler = xmlFindCharEncodingHandler("UCS2");
1615
0
            if (handler != NULL) return(handler);
1616
0
      break;
1617
1618
      /*
1619
       * We used to keep ISO Latin encodings native in the
1620
       * generated data. This led to so many problems that
1621
       * this has been removed. One can still change this
1622
       * back by registering no-ops encoders for those
1623
       */
1624
0
        case XML_CHAR_ENCODING_8859_1:
1625
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1626
0
      if (handler != NULL) return(handler);
1627
0
      break;
1628
0
        case XML_CHAR_ENCODING_8859_2:
1629
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1630
0
      if (handler != NULL) return(handler);
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_8859_3:
1633
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1634
0
      if (handler != NULL) return(handler);
1635
0
      break;
1636
0
        case XML_CHAR_ENCODING_8859_4:
1637
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1638
0
      if (handler != NULL) return(handler);
1639
0
      break;
1640
0
        case XML_CHAR_ENCODING_8859_5:
1641
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1642
0
      if (handler != NULL) return(handler);
1643
0
      break;
1644
0
        case XML_CHAR_ENCODING_8859_6:
1645
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1646
0
      if (handler != NULL) return(handler);
1647
0
      break;
1648
0
        case XML_CHAR_ENCODING_8859_7:
1649
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1650
0
      if (handler != NULL) return(handler);
1651
0
      break;
1652
0
        case XML_CHAR_ENCODING_8859_8:
1653
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1654
0
      if (handler != NULL) return(handler);
1655
0
      break;
1656
0
        case XML_CHAR_ENCODING_8859_9:
1657
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1658
0
      if (handler != NULL) return(handler);
1659
0
      break;
1660
1661
1662
0
        case XML_CHAR_ENCODING_2022_JP:
1663
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1664
0
            if (handler != NULL) return(handler);
1665
0
      break;
1666
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1667
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1668
0
            if (handler != NULL) return(handler);
1669
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1670
0
            if (handler != NULL) return(handler);
1671
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1672
0
            if (handler != NULL) return(handler);
1673
0
      break;
1674
0
        case XML_CHAR_ENCODING_EUC_JP:
1675
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1676
0
            if (handler != NULL) return(handler);
1677
0
      break;
1678
0
  default:
1679
0
      break;
1680
461
    }
1681
1682
#ifdef DEBUG_ENCODING
1683
    xmlGenericError(xmlGenericErrorContext,
1684
      "No handler found for encoding %d\n", enc);
1685
#endif
1686
0
    return(NULL);
1687
461
}
1688
1689
/**
1690
 * xmlFindCharEncodingHandler:
1691
 * @name:  a string describing the char encoding.
1692
 *
1693
 * Search in the registered set the handler able to read/write that encoding
1694
 * or create a new one.
1695
 *
1696
 * Returns the handler or NULL if not found
1697
 */
1698
xmlCharEncodingHandlerPtr
1699
88
xmlFindCharEncodingHandler(const char *name) {
1700
88
    const char *nalias;
1701
88
    const char *norig;
1702
88
    xmlCharEncoding alias;
1703
88
#ifdef LIBXML_ICONV_ENABLED
1704
88
    xmlCharEncodingHandlerPtr enc;
1705
88
    iconv_t icv_in, icv_out;
1706
88
#endif /* LIBXML_ICONV_ENABLED */
1707
#ifdef LIBXML_ICU_ENABLED
1708
    xmlCharEncodingHandlerPtr encu;
1709
    uconv_t *ucv_in, *ucv_out;
1710
#endif /* LIBXML_ICU_ENABLED */
1711
88
    char upper[100];
1712
88
    int i;
1713
1714
88
    if (name == NULL) return(NULL);
1715
88
    if (name[0] == 0) return(NULL);
1716
1717
    /*
1718
     * Do the alias resolution
1719
     */
1720
88
    norig = name;
1721
88
    nalias = xmlGetEncodingAlias(name);
1722
88
    if (nalias != NULL)
1723
0
  name = nalias;
1724
1725
    /*
1726
     * Check first for directly registered encoding names
1727
     */
1728
1.01k
    for (i = 0;i < 99;i++) {
1729
1.01k
        upper[i] = (char) toupper((unsigned char) name[i]);
1730
1.01k
  if (upper[i] == 0) break;
1731
1.01k
    }
1732
88
    upper[i] = 0;
1733
1734
640
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1735
591
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1736
39
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1737
591
    }
1738
1739
49
    if (handlers != NULL) {
1740
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1741
0
            if (!strcmp(upper, handlers[i]->name)) {
1742
#ifdef DEBUG_ENCODING
1743
                xmlGenericError(xmlGenericErrorContext,
1744
                        "Found registered handler for encoding %s\n", name);
1745
#endif
1746
0
                return(handlers[i]);
1747
0
            }
1748
0
        }
1749
0
    }
1750
1751
49
#ifdef LIBXML_ICONV_ENABLED
1752
    /* check whether iconv can handle this */
1753
49
    icv_in = iconv_open("UTF-8", name);
1754
49
    icv_out = iconv_open(name, "UTF-8");
1755
49
    if (icv_in == (iconv_t) -1) {
1756
0
        icv_in = iconv_open("UTF-8", upper);
1757
0
    }
1758
49
    if (icv_out == (iconv_t) -1) {
1759
0
  icv_out = iconv_open(upper, "UTF-8");
1760
0
    }
1761
49
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1762
49
      enc = (xmlCharEncodingHandlerPtr)
1763
49
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1764
49
      if (enc == NULL) {
1765
0
          iconv_close(icv_in);
1766
0
          iconv_close(icv_out);
1767
0
    return(NULL);
1768
0
      }
1769
49
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1770
49
      enc->name = xmlMemStrdup(name);
1771
49
            if (enc->name == NULL) {
1772
0
                xmlFree(enc);
1773
0
                iconv_close(icv_in);
1774
0
                iconv_close(icv_out);
1775
0
                return(NULL);
1776
0
            }
1777
49
      enc->input = NULL;
1778
49
      enc->output = NULL;
1779
49
      enc->iconv_in = icv_in;
1780
49
      enc->iconv_out = icv_out;
1781
#ifdef DEBUG_ENCODING
1782
            xmlGenericError(xmlGenericErrorContext,
1783
        "Found iconv handler for encoding %s\n", name);
1784
#endif
1785
49
      return enc;
1786
49
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1787
0
      if (icv_in != (iconv_t) -1)
1788
0
    iconv_close(icv_in);
1789
0
      else
1790
0
    iconv_close(icv_out);
1791
0
    }
1792
0
#endif /* LIBXML_ICONV_ENABLED */
1793
#ifdef LIBXML_ICU_ENABLED
1794
    /* check whether icu can handle this */
1795
    ucv_in = openIcuConverter(name, 1);
1796
    ucv_out = openIcuConverter(name, 0);
1797
    if (ucv_in != NULL && ucv_out != NULL) {
1798
      encu = (xmlCharEncodingHandlerPtr)
1799
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1800
      if (encu == NULL) {
1801
                closeIcuConverter(ucv_in);
1802
                closeIcuConverter(ucv_out);
1803
    return(NULL);
1804
      }
1805
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1806
      encu->name = xmlMemStrdup(name);
1807
            if (encu->name == NULL) {
1808
                xmlFree(encu);
1809
                closeIcuConverter(ucv_in);
1810
                closeIcuConverter(ucv_out);
1811
                return(NULL);
1812
            }
1813
      encu->input = NULL;
1814
      encu->output = NULL;
1815
      encu->uconv_in = ucv_in;
1816
      encu->uconv_out = ucv_out;
1817
#ifdef DEBUG_ENCODING
1818
            xmlGenericError(xmlGenericErrorContext,
1819
        "Found ICU converter handler for encoding %s\n", name);
1820
#endif
1821
      return encu;
1822
    } else if (ucv_in != NULL || ucv_out != NULL) {
1823
            closeIcuConverter(ucv_in);
1824
            closeIcuConverter(ucv_out);
1825
    }
1826
#endif /* LIBXML_ICU_ENABLED */
1827
1828
#ifdef DEBUG_ENCODING
1829
    xmlGenericError(xmlGenericErrorContext,
1830
      "No handler found for encoding %s\n", name);
1831
#endif
1832
1833
    /*
1834
     * Fallback using the canonical names
1835
     */
1836
0
    alias = xmlParseCharEncoding(norig);
1837
0
    if (alias != XML_CHAR_ENCODING_ERROR) {
1838
0
        const char* canon;
1839
0
        canon = xmlGetCharEncodingName(alias);
1840
0
        if ((canon != NULL) && (strcmp(name, canon))) {
1841
0
      return(xmlFindCharEncodingHandler(canon));
1842
0
        }
1843
0
    }
1844
1845
    /* If "none of the above", give up */
1846
0
    return(NULL);
1847
0
}
1848
1849
/************************************************************************
1850
 *                  *
1851
 *    ICONV based generic conversion functions    *
1852
 *                  *
1853
 ************************************************************************/
1854
1855
#ifdef LIBXML_ICONV_ENABLED
1856
/**
1857
 * xmlIconvWrapper:
1858
 * @cd:   iconv converter data structure
1859
 * @out:  a pointer to an array of bytes to store the result
1860
 * @outlen:  the length of @out
1861
 * @in:  a pointer to an array of input bytes
1862
 * @inlen:  the length of @in
1863
 *
1864
 * Returns an XML_ENC_ERR code.
1865
 *
1866
 * The value of @inlen after return is the number of octets consumed
1867
 *     as the return value is positive, else unpredictable.
1868
 * The value of @outlen after return is the number of octets produced.
1869
 */
1870
static int
1871
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1872
107
                const unsigned char *in, int *inlen) {
1873
107
    size_t icv_inlen, icv_outlen;
1874
107
    const char *icv_in = (const char *) in;
1875
107
    char *icv_out = (char *) out;
1876
107
    size_t ret;
1877
1878
107
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1879
0
        if (outlen != NULL) *outlen = 0;
1880
0
        return(XML_ENC_ERR_INTERNAL);
1881
0
    }
1882
107
    icv_inlen = *inlen;
1883
107
    icv_outlen = *outlen;
1884
    /*
1885
     * Some versions take const, other versions take non-const input.
1886
     */
1887
107
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1888
107
    *inlen -= icv_inlen;
1889
107
    *outlen -= icv_outlen;
1890
107
    if (ret == (size_t) -1) {
1891
81
        if (errno == EILSEQ)
1892
37
            return(XML_ENC_ERR_INPUT);
1893
44
        if (errno == E2BIG)
1894
44
            return(XML_ENC_ERR_SPACE);
1895
0
        if (errno == EINVAL)
1896
0
            return(XML_ENC_ERR_PARTIAL);
1897
0
        return(XML_ENC_ERR_INTERNAL);
1898
0
    }
1899
26
    return(XML_ENC_ERR_SUCCESS);
1900
107
}
1901
#endif /* LIBXML_ICONV_ENABLED */
1902
1903
/************************************************************************
1904
 *                  *
1905
 *    ICU based generic conversion functions    *
1906
 *                  *
1907
 ************************************************************************/
1908
1909
#ifdef LIBXML_ICU_ENABLED
1910
/**
1911
 * xmlUconvWrapper:
1912
 * @cd: ICU uconverter data structure
1913
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1914
 * @out:  a pointer to an array of bytes to store the result
1915
 * @outlen:  the length of @out
1916
 * @in:  a pointer to an array of input bytes
1917
 * @inlen:  the length of @in
1918
 * @flush: if true, indicates end of input
1919
 *
1920
 * Returns an XML_ENC_ERR code.
1921
 *
1922
 * The value of @inlen after return is the number of octets consumed
1923
 *     as the return value is positive, else unpredictable.
1924
 * The value of @outlen after return is the number of octets produced.
1925
 */
1926
static int
1927
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1928
                const unsigned char *in, int *inlen, int flush) {
1929
    const char *ucv_in = (const char *) in;
1930
    char *ucv_out = (char *) out;
1931
    UErrorCode err = U_ZERO_ERROR;
1932
1933
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1934
        if (outlen != NULL) *outlen = 0;
1935
        return(XML_ENC_ERR_INTERNAL);
1936
    }
1937
1938
    if (toUnicode) {
1939
        /* encoding => UTF-16 => UTF-8 */
1940
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1941
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1942
                       &cd->pivot_source, &cd->pivot_target,
1943
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1944
    } else {
1945
        /* UTF-8 => UTF-16 => encoding */
1946
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1947
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1948
                       &cd->pivot_source, &cd->pivot_target,
1949
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1950
    }
1951
    *inlen = ucv_in - (const char*) in;
1952
    *outlen = ucv_out - (char *) out;
1953
    if (U_SUCCESS(err)) {
1954
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1955
        if (flush)
1956
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1957
        return(XML_ENC_ERR_SUCCESS);
1958
    }
1959
    if (err == U_BUFFER_OVERFLOW_ERROR)
1960
        return(XML_ENC_ERR_SPACE);
1961
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1962
        return(XML_ENC_ERR_INPUT);
1963
    return(XML_ENC_ERR_PARTIAL);
1964
}
1965
#endif /* LIBXML_ICU_ENABLED */
1966
1967
/************************************************************************
1968
 *                  *
1969
 *    The real API used by libxml for on-the-fly conversion *
1970
 *                  *
1971
 ************************************************************************/
1972
1973
/**
1974
 * xmlEncConvertError:
1975
 * @code:  XML_ENC_ERR code
1976
 *
1977
 * Convert XML_ENC_ERR to libxml2 error codes.
1978
 */
1979
static int
1980
19
xmlEncConvertError(int code) {
1981
19
    int ret;
1982
1983
19
    switch (code) {
1984
0
        case XML_ENC_ERR_SUCCESS:
1985
0
            ret = XML_ERR_OK;
1986
0
            break;
1987
19
        case XML_ENC_ERR_INPUT:
1988
19
            ret = XML_ERR_INVALID_ENCODING;
1989
19
            break;
1990
0
        case XML_ENC_ERR_MEMORY:
1991
0
            ret = XML_ERR_NO_MEMORY;
1992
0
            break;
1993
0
        default:
1994
0
            ret = XML_ERR_INTERNAL_ERROR;
1995
0
            break;
1996
19
    }
1997
1998
19
    return(ret);
1999
19
}
2000
2001
/**
2002
 * xmlEncInputChunk:
2003
 * @handler:  encoding handler
2004
 * @out:  a pointer to an array of bytes to store the result
2005
 * @outlen:  the length of @out
2006
 * @in:  a pointer to an array of input bytes
2007
 * @inlen:  the length of @in
2008
 * @flush:  flush (ICU-related)
2009
 *
2010
 * Returns an XML_ENC_ERR code.
2011
 *
2012
 * The value of @inlen after return is the number of octets consumed
2013
 *     as the return value is 0, else unpredictable.
2014
 * The value of @outlen after return is the number of octets produced.
2015
 */
2016
int
2017
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2018
2.35k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2019
2.35k
    int ret;
2020
2.35k
    (void)flush;
2021
2022
2.35k
    if (handler->input != NULL) {
2023
2.24k
        ret = handler->input(out, outlen, in, inlen);
2024
2.24k
        if (ret > 0)
2025
86
           ret = XML_ENC_ERR_SUCCESS;
2026
2.24k
    }
2027
107
#ifdef LIBXML_ICONV_ENABLED
2028
107
    else if (handler->iconv_in != NULL) {
2029
107
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2030
107
    }
2031
0
#endif /* LIBXML_ICONV_ENABLED */
2032
#ifdef LIBXML_ICU_ENABLED
2033
    else if (handler->uconv_in != NULL) {
2034
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2035
                              flush);
2036
    }
2037
#endif /* LIBXML_ICU_ENABLED */
2038
0
    else {
2039
0
        *outlen = 0;
2040
0
        *inlen = 0;
2041
0
        ret = XML_ENC_ERR_INTERNAL;
2042
0
    }
2043
2044
    /* Ignore space and partial errors when reading. */
2045
2.35k
    if ((ret == XML_ENC_ERR_SPACE) || (ret == XML_ENC_ERR_PARTIAL))
2046
44
        ret = XML_ENC_ERR_SUCCESS;
2047
2048
2.35k
    return(ret);
2049
2.35k
}
2050
2051
/**
2052
 * xmlEncOutputChunk:
2053
 * @handler:  encoding handler
2054
 * @out:  a pointer to an array of bytes to store the result
2055
 * @outlen:  the length of @out
2056
 * @in:  a pointer to an array of input bytes
2057
 * @inlen:  the length of @in
2058
 *
2059
 * Returns an XML_ENC_ERR code.
2060
 *
2061
 * The value of @inlen after return is the number of octets consumed
2062
 *     as the return value is 0, else unpredictable.
2063
 * The value of @outlen after return is the number of octets produced.
2064
 */
2065
static int
2066
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2067
0
                  int *outlen, const unsigned char *in, int *inlen) {
2068
0
    int ret;
2069
2070
0
    if (handler->output != NULL) {
2071
0
        ret = handler->output(out, outlen, in, inlen);
2072
0
        if (ret > 0)
2073
0
           ret = XML_ENC_ERR_SUCCESS;
2074
0
    }
2075
0
#ifdef LIBXML_ICONV_ENABLED
2076
0
    else if (handler->iconv_out != NULL) {
2077
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2078
0
    }
2079
0
#endif /* LIBXML_ICONV_ENABLED */
2080
#ifdef LIBXML_ICU_ENABLED
2081
    else if (handler->uconv_out != NULL) {
2082
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2083
                              1);
2084
    }
2085
#endif /* LIBXML_ICU_ENABLED */
2086
0
    else {
2087
0
        *outlen = 0;
2088
0
        *inlen = 0;
2089
0
        ret = XML_ENC_ERR_INTERNAL;
2090
0
    }
2091
2092
    /* We shouldn't generate partial sequences when writing. */
2093
0
    if (ret == XML_ENC_ERR_PARTIAL)
2094
0
        ret = XML_ENC_ERR_INTERNAL;
2095
2096
0
    return(ret);
2097
0
}
2098
2099
/**
2100
 * xmlCharEncFirstLine:
2101
 * @handler:   char encoding transformation data structure
2102
 * @out:  an xmlBuffer for the output.
2103
 * @in:  an xmlBuffer for the input
2104
 *
2105
 * DEPERECATED: Don't use.
2106
 */
2107
int
2108
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2109
0
                    xmlBufferPtr in) {
2110
0
    return(xmlCharEncInFunc(handler, out, in));
2111
0
}
2112
2113
/**
2114
 * xmlCharEncInput:
2115
 * @input: a parser input buffer
2116
 * @flush: try to flush all the raw buffer
2117
 *
2118
 * Generic front-end for the encoding handler on parser input
2119
 *
2120
 * Returns the number of bytes written or an XML_ENC_ERR code.
2121
 */
2122
int
2123
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2124
8.00k
{
2125
8.00k
    int ret;
2126
8.00k
    size_t written;
2127
8.00k
    size_t toconv;
2128
8.00k
    int c_in;
2129
8.00k
    int c_out;
2130
8.00k
    xmlBufPtr in;
2131
8.00k
    xmlBufPtr out;
2132
2133
8.00k
    if ((input == NULL) || (input->encoder == NULL) ||
2134
8.00k
        (input->buffer == NULL) || (input->raw == NULL))
2135
0
        return(XML_ENC_ERR_INTERNAL);
2136
8.00k
    out = input->buffer;
2137
8.00k
    in = input->raw;
2138
2139
8.00k
    toconv = xmlBufUse(in);
2140
8.00k
    if (toconv == 0)
2141
5.64k
        return (0);
2142
2.35k
    if ((toconv > 64 * 1024) && (flush == 0))
2143
0
        toconv = 64 * 1024;
2144
2.35k
    written = xmlBufAvail(out);
2145
2.35k
    if (toconv * 2 >= written) {
2146
172
        if (xmlBufGrow(out, toconv * 2) < 0)
2147
0
            return(XML_ENC_ERR_MEMORY);
2148
172
        written = xmlBufAvail(out);
2149
172
    }
2150
2.35k
    if ((written > 128 * 1024) && (flush == 0))
2151
0
        written = 128 * 1024;
2152
2153
2.35k
    c_in = toconv;
2154
2.35k
    c_out = written;
2155
2.35k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2156
2.35k
                           xmlBufContent(in), &c_in, flush);
2157
2.35k
    xmlBufShrink(in, c_in);
2158
2.35k
    xmlBufAddLen(out, c_out);
2159
2160
2.35k
    if ((c_out == 0) && (ret != 0)) {
2161
19
        if (input->error == 0)
2162
19
            input->error = xmlEncConvertError(ret);
2163
19
        return(ret);
2164
19
    }
2165
2166
2.33k
    return (c_out);
2167
2.35k
}
2168
2169
/**
2170
 * xmlCharEncInFunc:
2171
 * @handler:  char encoding transformation data structure
2172
 * @out:  an xmlBuffer for the output.
2173
 * @in:  an xmlBuffer for the input
2174
 *
2175
 * Generic front-end for the encoding handler input function
2176
 *
2177
 * Returns the number of bytes written or an XML_ENC_ERR code.
2178
 */
2179
int
2180
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2181
                 xmlBufferPtr in)
2182
0
{
2183
0
    int ret;
2184
0
    int written;
2185
0
    int toconv;
2186
2187
0
    if (handler == NULL)
2188
0
        return(XML_ENC_ERR_INTERNAL);
2189
0
    if (out == NULL)
2190
0
        return(XML_ENC_ERR_INTERNAL);
2191
0
    if (in == NULL)
2192
0
        return(XML_ENC_ERR_INTERNAL);
2193
2194
0
    toconv = in->use;
2195
0
    if (toconv == 0)
2196
0
        return (0);
2197
0
    written = out->size - out->use -1; /* count '\0' */
2198
0
    if (toconv * 2 >= written) {
2199
0
        xmlBufferGrow(out, out->size + toconv * 2);
2200
0
        written = out->size - out->use - 1;
2201
0
    }
2202
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2203
0
                           in->content, &toconv, 1);
2204
0
    xmlBufferShrink(in, toconv);
2205
0
    out->use += written;
2206
0
    out->content[out->use] = 0;
2207
2208
0
    return (written? written : ret);
2209
0
}
2210
2211
#ifdef LIBXML_OUTPUT_ENABLED
2212
/**
2213
 * xmlCharEncOutput:
2214
 * @output: a parser output buffer
2215
 * @init: is this an initialization call without data
2216
 *
2217
 * Generic front-end for the encoding handler on parser output
2218
 * a first call with @init == 1 has to be made first to initiate the
2219
 * output in case of non-stateless encoding needing to initiate their
2220
 * state or the output (like the BOM in UTF16).
2221
 * In case of UTF8 sequence conversion errors for the given encoder,
2222
 * the content will be automatically remapped to a CharRef sequence.
2223
 *
2224
 * Returns the number of bytes written or an XML_ENC_ERR code.
2225
 */
2226
int
2227
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2228
0
{
2229
0
    int ret;
2230
0
    size_t written;
2231
0
    int writtentot = 0;
2232
0
    size_t toconv;
2233
0
    int c_in;
2234
0
    int c_out;
2235
0
    xmlBufPtr in;
2236
0
    xmlBufPtr out;
2237
2238
0
    if ((output == NULL) || (output->encoder == NULL) ||
2239
0
        (output->buffer == NULL) || (output->conv == NULL))
2240
0
        return(XML_ENC_ERR_INTERNAL);
2241
0
    out = output->conv;
2242
0
    in = output->buffer;
2243
2244
0
retry:
2245
2246
0
    written = xmlBufAvail(out);
2247
2248
    /*
2249
     * First specific handling of the initialization call
2250
     */
2251
0
    if (init) {
2252
0
        c_in = 0;
2253
0
        c_out = written;
2254
        /* TODO: Check return value. */
2255
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2256
0
                          NULL, &c_in);
2257
0
        xmlBufAddLen(out, c_out);
2258
#ifdef DEBUG_ENCODING
2259
  xmlGenericError(xmlGenericErrorContext,
2260
    "initialized encoder\n");
2261
#endif
2262
0
        return(c_out);
2263
0
    }
2264
2265
    /*
2266
     * Conversion itself.
2267
     */
2268
0
    toconv = xmlBufUse(in);
2269
0
    if (toconv == 0)
2270
0
        return (writtentot);
2271
0
    if (toconv > 64 * 1024)
2272
0
        toconv = 64 * 1024;
2273
0
    if (toconv * 4 >= written) {
2274
0
        xmlBufGrow(out, toconv * 4);
2275
0
        written = xmlBufAvail(out);
2276
0
    }
2277
0
    if (written > 256 * 1024)
2278
0
        written = 256 * 1024;
2279
2280
0
    c_in = toconv;
2281
0
    c_out = written;
2282
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2283
0
                            xmlBufContent(in), &c_in);
2284
0
    xmlBufShrink(in, c_in);
2285
0
    xmlBufAddLen(out, c_out);
2286
0
    writtentot += c_out;
2287
2288
0
    if (ret == XML_ENC_ERR_SPACE)
2289
0
        goto retry;
2290
2291
    /*
2292
     * Attempt to handle error cases
2293
     */
2294
0
    if (ret == XML_ENC_ERR_INPUT) {
2295
0
        xmlChar charref[20];
2296
0
        int len = xmlBufUse(in);
2297
0
        xmlChar *content = xmlBufContent(in);
2298
0
        int cur, charrefLen;
2299
2300
0
        cur = xmlGetUTF8Char(content, &len);
2301
0
        if (cur <= 0)
2302
0
            goto error;
2303
2304
#ifdef DEBUG_ENCODING
2305
        xmlGenericError(xmlGenericErrorContext,
2306
                "handling output conversion error\n");
2307
        xmlGenericError(xmlGenericErrorContext,
2308
                "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2309
                content[0], content[1],
2310
                content[2], content[3]);
2311
#endif
2312
        /*
2313
         * Removes the UTF8 sequence, and replace it by a charref
2314
         * and continue the transcoding phase, hoping the error
2315
         * did not mangle the encoder state.
2316
         */
2317
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2318
0
                         "&#%d;", cur);
2319
0
        xmlBufShrink(in, len);
2320
0
        xmlBufGrow(out, charrefLen * 4);
2321
0
        c_out = xmlBufAvail(out);
2322
0
        c_in = charrefLen;
2323
0
        ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2324
0
                                charref, &c_in);
2325
0
        if ((ret < 0) || (c_in != charrefLen)) {
2326
0
            ret = XML_ENC_ERR_INTERNAL;
2327
0
            goto error;
2328
0
        }
2329
2330
0
        xmlBufAddLen(out, c_out);
2331
0
        writtentot += c_out;
2332
0
        goto retry;
2333
0
    }
2334
2335
0
error:
2336
0
    if ((writtentot <= 0) && (ret != 0)) {
2337
0
        if (output->error == 0)
2338
0
            output->error = xmlEncConvertError(ret);
2339
0
        return(ret);
2340
0
    }
2341
2342
0
    return(writtentot);
2343
0
}
2344
#endif
2345
2346
/**
2347
 * xmlCharEncOutFunc:
2348
 * @handler:  char encoding transformation data structure
2349
 * @out:  an xmlBuffer for the output.
2350
 * @in:  an xmlBuffer for the input
2351
 *
2352
 * Generic front-end for the encoding handler output function
2353
 * a first call with @in == NULL has to be made firs to initiate the
2354
 * output in case of non-stateless encoding needing to initiate their
2355
 * state or the output (like the BOM in UTF16).
2356
 * In case of UTF8 sequence conversion errors for the given encoder,
2357
 * the content will be automatically remapped to a CharRef sequence.
2358
 *
2359
 * Returns the number of bytes written or an XML_ENC_ERR code.
2360
 */
2361
int
2362
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2363
0
                  xmlBufferPtr in) {
2364
0
    int ret;
2365
0
    int written;
2366
0
    int writtentot = 0;
2367
0
    int toconv;
2368
2369
0
    if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2370
0
    if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2371
2372
0
retry:
2373
2374
0
    written = out->size - out->use;
2375
2376
0
    if (written > 0)
2377
0
  written--; /* Gennady: count '/0' */
2378
2379
    /*
2380
     * First specific handling of in = NULL, i.e. the initialization call
2381
     */
2382
0
    if (in == NULL) {
2383
0
        toconv = 0;
2384
        /* TODO: Check return value. */
2385
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2386
0
                          NULL, &toconv);
2387
0
        out->use += written;
2388
0
        out->content[out->use] = 0;
2389
#ifdef DEBUG_ENCODING
2390
  xmlGenericError(xmlGenericErrorContext,
2391
    "initialized encoder\n");
2392
#endif
2393
0
        return(0);
2394
0
    }
2395
2396
    /*
2397
     * Conversion itself.
2398
     */
2399
0
    toconv = in->use;
2400
0
    if (toconv == 0)
2401
0
  return(0);
2402
0
    if (toconv * 4 >= written) {
2403
0
        xmlBufferGrow(out, toconv * 4);
2404
0
  written = out->size - out->use - 1;
2405
0
    }
2406
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2407
0
                            in->content, &toconv);
2408
0
    xmlBufferShrink(in, toconv);
2409
0
    out->use += written;
2410
0
    writtentot += written;
2411
0
    out->content[out->use] = 0;
2412
2413
0
    if (ret == XML_ENC_ERR_SPACE)
2414
0
        goto retry;
2415
2416
    /*
2417
     * Attempt to handle error cases
2418
     */
2419
0
    if (ret == XML_ENC_ERR_INPUT) {
2420
0
        xmlChar charref[20];
2421
0
        int len = in->use;
2422
0
        const xmlChar *utf = (const xmlChar *) in->content;
2423
0
        int cur, charrefLen;
2424
2425
0
        cur = xmlGetUTF8Char(utf, &len);
2426
0
        if (cur <= 0)
2427
0
            return(ret);
2428
2429
#ifdef DEBUG_ENCODING
2430
        xmlGenericError(xmlGenericErrorContext,
2431
                "handling output conversion error\n");
2432
        xmlGenericError(xmlGenericErrorContext,
2433
                "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2434
                in->content[0], in->content[1],
2435
                in->content[2], in->content[3]);
2436
#endif
2437
        /*
2438
         * Removes the UTF8 sequence, and replace it by a charref
2439
         * and continue the transcoding phase, hoping the error
2440
         * did not mangle the encoder state.
2441
         */
2442
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2443
0
                         "&#%d;", cur);
2444
0
        xmlBufferShrink(in, len);
2445
0
        xmlBufferGrow(out, charrefLen * 4);
2446
0
        written = out->size - out->use - 1;
2447
0
        toconv = charrefLen;
2448
0
        ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2449
0
                                charref, &toconv);
2450
0
        if ((ret < 0) || (toconv != charrefLen))
2451
0
            return(XML_ENC_ERR_INTERNAL);
2452
2453
0
        out->use += written;
2454
0
        writtentot += written;
2455
0
        out->content[out->use] = 0;
2456
0
        goto retry;
2457
0
    }
2458
0
    return(writtentot ? writtentot : ret);
2459
0
}
2460
2461
/**
2462
 * xmlCharEncCloseFunc:
2463
 * @handler:  char encoding transformation data structure
2464
 *
2465
 * Generic front-end for encoding handler close function
2466
 *
2467
 * Returns 0 if success, or -1 in case of error
2468
 */
2469
int
2470
139
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2471
139
    int ret = 0;
2472
139
    int tofree = 0;
2473
139
    int i = 0;
2474
2475
139
    if (handler == NULL) return(-1);
2476
2477
766
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2478
717
        if (handler == &defaultHandlers[i])
2479
90
            return(0);
2480
717
    }
2481
2482
49
    if (handlers != NULL) {
2483
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2484
0
            if (handler == handlers[i])
2485
0
                return(0);
2486
0
  }
2487
0
    }
2488
49
#ifdef LIBXML_ICONV_ENABLED
2489
    /*
2490
     * Iconv handlers can be used only once, free the whole block.
2491
     * and the associated icon resources.
2492
     */
2493
49
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2494
49
        tofree = 1;
2495
49
  if (handler->iconv_out != NULL) {
2496
49
      if (iconv_close(handler->iconv_out))
2497
0
    ret = -1;
2498
49
      handler->iconv_out = NULL;
2499
49
  }
2500
49
  if (handler->iconv_in != NULL) {
2501
49
      if (iconv_close(handler->iconv_in))
2502
0
    ret = -1;
2503
49
      handler->iconv_in = NULL;
2504
49
  }
2505
49
    }
2506
49
#endif /* LIBXML_ICONV_ENABLED */
2507
#ifdef LIBXML_ICU_ENABLED
2508
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2509
        tofree = 1;
2510
  if (handler->uconv_out != NULL) {
2511
      closeIcuConverter(handler->uconv_out);
2512
      handler->uconv_out = NULL;
2513
  }
2514
  if (handler->uconv_in != NULL) {
2515
      closeIcuConverter(handler->uconv_in);
2516
      handler->uconv_in = NULL;
2517
  }
2518
    }
2519
#endif
2520
49
    if (tofree) {
2521
        /* free up only dynamic handlers iconv/uconv */
2522
49
        if (handler->name != NULL)
2523
49
            xmlFree(handler->name);
2524
49
        handler->name = NULL;
2525
49
        xmlFree(handler);
2526
49
    }
2527
#ifdef DEBUG_ENCODING
2528
    if (ret)
2529
        xmlGenericError(xmlGenericErrorContext,
2530
    "failed to close the encoding handler\n");
2531
    else
2532
        xmlGenericError(xmlGenericErrorContext,
2533
    "closed the encoding handler\n");
2534
#endif
2535
2536
49
    return(ret);
2537
49
}
2538
2539
/**
2540
 * xmlByteConsumed:
2541
 * @ctxt: an XML parser context
2542
 *
2543
 * This function provides the current index of the parser relative
2544
 * to the start of the current entity. This function is computed in
2545
 * bytes from the beginning starting at zero and finishing at the
2546
 * size in byte of the file if parsing a file. The function is
2547
 * of constant cost if the input is UTF-8 but can be costly if run
2548
 * on non-UTF-8 input.
2549
 *
2550
 * Returns the index in bytes from the beginning of the entity or -1
2551
 *         in case the index could not be computed.
2552
 */
2553
long
2554
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2555
0
    xmlParserInputPtr in;
2556
2557
0
    if (ctxt == NULL) return(-1);
2558
0
    in = ctxt->input;
2559
0
    if (in == NULL)  return(-1);
2560
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2561
0
        unsigned int unused = 0;
2562
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2563
        /*
2564
   * Encoding conversion, compute the number of unused original
2565
   * bytes from the input not consumed and subtract that from
2566
   * the raw consumed value, this is not a cheap operation
2567
   */
2568
0
        if (in->end - in->cur > 0) {
2569
0
      unsigned char convbuf[32000];
2570
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2571
0
      int toconv = in->end - in->cur, written = 32000;
2572
2573
0
      int ret;
2574
2575
0
            do {
2576
0
                toconv = in->end - cur;
2577
0
                written = 32000;
2578
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2579
0
                                        cur, &toconv);
2580
0
                if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2581
0
                    return(-1);
2582
0
                unused += written;
2583
0
                cur += toconv;
2584
0
            } while (ret == XML_ENC_ERR_SPACE);
2585
0
  }
2586
0
  if (in->buf->rawconsumed < unused)
2587
0
      return(-1);
2588
0
  return(in->buf->rawconsumed - unused);
2589
0
    }
2590
0
    return(in->consumed + (in->cur - in->base));
2591
0
}
2592
2593
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2594
#ifdef LIBXML_ISO8859X_ENABLED
2595
2596
/**
2597
 * UTF8ToISO8859x:
2598
 * @out:  a pointer to an array of bytes to store the result
2599
 * @outlen:  the length of @out
2600
 * @in:  a pointer to an array of UTF-8 chars
2601
 * @inlen:  the length of @in
2602
 * @xlattable: the 2-level transcoding table
2603
 *
2604
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2605
 * block of chars out.
2606
 *
2607
 * Returns the number of bytes written or an XML_ENC_ERR code.
2608
 *
2609
 * The value of @inlen after return is the number of octets consumed
2610
 * as the return value is positive, else unpredictable.
2611
 * The value of @outlen after return is the number of octets consumed.
2612
 */
2613
static int
2614
UTF8ToISO8859x(unsigned char* out, int *outlen,
2615
              const unsigned char* in, int *inlen,
2616
              const unsigned char* const xlattable) {
2617
    const unsigned char* outstart = out;
2618
    const unsigned char* inend;
2619
    const unsigned char* instart = in;
2620
    const unsigned char* processed = in;
2621
2622
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2623
        (xlattable == NULL))
2624
  return(XML_ENC_ERR_INTERNAL);
2625
    if (in == NULL) {
2626
        /*
2627
        * initialization nothing to do
2628
        */
2629
        *outlen = 0;
2630
        *inlen = 0;
2631
        return(0);
2632
    }
2633
    inend = in + (*inlen);
2634
    while (in < inend) {
2635
        unsigned char d = *in++;
2636
        if  (d < 0x80)  {
2637
            *out++ = d;
2638
        } else if (d < 0xC0) {
2639
            /* trailing byte in leading position */
2640
            *outlen = out - outstart;
2641
            *inlen = processed - instart;
2642
            return(XML_ENC_ERR_INPUT);
2643
        } else if (d < 0xE0) {
2644
            unsigned char c;
2645
            if (!(in < inend)) {
2646
                /* trailing byte not in input buffer */
2647
                *outlen = out - outstart;
2648
                *inlen = processed - instart;
2649
                return(XML_ENC_ERR_PARTIAL);
2650
            }
2651
            c = *in++;
2652
            if ((c & 0xC0) != 0x80) {
2653
                /* not a trailing byte */
2654
                *outlen = out - outstart;
2655
                *inlen = processed - instart;
2656
                return(XML_ENC_ERR_INPUT);
2657
            }
2658
            c = c & 0x3F;
2659
            d = d & 0x1F;
2660
            d = xlattable [48 + c + xlattable [d] * 64];
2661
            if (d == 0) {
2662
                /* not in character set */
2663
                *outlen = out - outstart;
2664
                *inlen = processed - instart;
2665
                return(XML_ENC_ERR_INPUT);
2666
            }
2667
            *out++ = d;
2668
        } else if (d < 0xF0) {
2669
            unsigned char c1;
2670
            unsigned char c2;
2671
            if (!(in < inend - 1)) {
2672
                /* trailing bytes not in input buffer */
2673
                *outlen = out - outstart;
2674
                *inlen = processed - instart;
2675
                return(XML_ENC_ERR_PARTIAL);
2676
            }
2677
            c1 = *in++;
2678
            if ((c1 & 0xC0) != 0x80) {
2679
                /* not a trailing byte (c1) */
2680
                *outlen = out - outstart;
2681
                *inlen = processed - instart;
2682
                return(XML_ENC_ERR_INPUT);
2683
            }
2684
            c2 = *in++;
2685
            if ((c2 & 0xC0) != 0x80) {
2686
                /* not a trailing byte (c2) */
2687
                *outlen = out - outstart;
2688
                *inlen = processed - instart;
2689
                return(XML_ENC_ERR_INPUT);
2690
            }
2691
            c1 = c1 & 0x3F;
2692
            c2 = c2 & 0x3F;
2693
      d = d & 0x0F;
2694
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2695
      xlattable [32 + d] * 64] * 64];
2696
            if (d == 0) {
2697
                /* not in character set */
2698
                *outlen = out - outstart;
2699
                *inlen = processed - instart;
2700
                return(XML_ENC_ERR_INPUT);
2701
            }
2702
            *out++ = d;
2703
        } else {
2704
            /* cannot transcode >= U+010000 */
2705
            *outlen = out - outstart;
2706
            *inlen = processed - instart;
2707
            return(XML_ENC_ERR_INPUT);
2708
        }
2709
        processed = in;
2710
    }
2711
    *outlen = out - outstart;
2712
    *inlen = processed - instart;
2713
    return(*outlen);
2714
}
2715
2716
/**
2717
 * ISO8859xToUTF8
2718
 * @out:  a pointer to an array of bytes to store the result
2719
 * @outlen:  the length of @out
2720
 * @in:  a pointer to an array of ISO Latin 1 chars
2721
 * @inlen:  the length of @in
2722
 *
2723
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2724
 * block of chars out.
2725
 *
2726
 * Returns the number of bytes written or an XML_ENC_ERR code.
2727
 *
2728
 * The value of @inlen after return is the number of octets consumed
2729
 * The value of @outlen after return is the number of octets produced.
2730
 */
2731
static int
2732
ISO8859xToUTF8(unsigned char* out, int *outlen,
2733
              const unsigned char* in, int *inlen,
2734
              unsigned short const *unicodetable) {
2735
    unsigned char* outstart = out;
2736
    unsigned char* outend;
2737
    const unsigned char* instart = in;
2738
    const unsigned char* inend;
2739
    const unsigned char* instop;
2740
    unsigned int c;
2741
2742
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2743
        (in == NULL) || (unicodetable == NULL))
2744
  return(XML_ENC_ERR_INTERNAL);
2745
    outend = out + *outlen;
2746
    inend = in + *inlen;
2747
    instop = inend;
2748
2749
    while ((in < inend) && (out < outend - 2)) {
2750
        if (*in >= 0x80) {
2751
            c = unicodetable [*in - 0x80];
2752
            if (c == 0) {
2753
                /* undefined code point */
2754
                *outlen = out - outstart;
2755
                *inlen = in - instart;
2756
                return(XML_ENC_ERR_INPUT);
2757
            }
2758
            if (c < 0x800) {
2759
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2760
                *out++ = (c & 0x3F) | 0x80;
2761
            } else {
2762
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2763
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2764
                *out++ = (c & 0x3F) | 0x80;
2765
            }
2766
            ++in;
2767
        }
2768
        if (instop - in > outend - out) instop = in + (outend - out);
2769
        while ((*in < 0x80) && (in < instop)) {
2770
            *out++ = *in++;
2771
        }
2772
    }
2773
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2774
        *out++ =  *in++;
2775
    }
2776
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2777
        *out++ =  *in++;
2778
    }
2779
    *outlen = out - outstart;
2780
    *inlen = in - instart;
2781
    return (*outlen);
2782
}
2783
2784
2785
/************************************************************************
2786
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2787
 ************************************************************************/
2788
2789
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2790
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2791
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2792
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2793
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2794
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2795
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2796
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2797
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2798
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2799
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2800
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2801
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2802
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2803
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2804
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2805
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2806
};
2807
2808
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2809
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2810
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2817
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2818
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2819
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2820
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2821
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2822
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2824
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2825
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2829
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2830
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2831
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2832
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2833
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2834
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2835
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2836
};
2837
2838
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2839
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2840
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2841
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2842
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2843
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2844
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2845
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2846
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2847
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2848
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2849
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2850
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2851
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2852
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2853
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2854
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2855
};
2856
2857
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2858
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2866
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2867
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2868
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2869
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2870
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2871
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2872
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2875
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2883
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2884
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2885
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2886
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2887
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2888
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2889
};
2890
2891
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2892
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2893
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2894
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2895
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2896
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2897
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2898
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2899
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2900
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2901
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2902
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2903
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2904
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2905
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2906
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2907
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2908
};
2909
2910
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2911
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2912
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2919
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2920
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2921
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2922
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2923
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2924
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2925
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2926
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2927
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2928
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2929
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2930
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2931
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2932
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2935
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2936
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2937
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2938
};
2939
2940
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2941
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2942
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2943
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2944
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2945
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2946
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2947
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2948
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2949
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2950
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2951
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2952
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2953
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2954
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2955
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2956
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2957
};
2958
2959
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2960
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2968
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2969
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2970
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2972
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2973
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2974
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2975
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2976
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2977
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987
};
2988
2989
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2990
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2991
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2992
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2993
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2994
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2995
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2996
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2997
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2998
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2999
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3000
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3001
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3002
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3003
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3004
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3005
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3006
};
3007
3008
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3009
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3011
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3017
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3018
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3019
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3025
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3026
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3027
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3028
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3029
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032
};
3033
3034
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3035
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3036
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3037
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3038
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3039
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3040
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3041
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3042
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3043
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3044
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3045
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3046
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3047
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3048
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3049
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3050
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3051
};
3052
3053
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3054
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3055
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3062
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3063
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3064
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3065
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3071
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3078
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3079
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3080
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3081
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3082
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3083
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085
};
3086
3087
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3088
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3089
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3090
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3091
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3092
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3093
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3094
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3095
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3096
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3097
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3098
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3099
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3100
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3101
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3102
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3103
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3104
};
3105
3106
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3107
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3115
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3116
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3117
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3118
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3124
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3126
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3131
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3132
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3136
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3137
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138
};
3139
3140
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3146
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3147
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3148
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3149
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3150
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3151
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3152
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3153
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3154
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3155
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3156
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3160
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3170
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3171
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3172
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3173
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3174
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3175
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3181
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
};
3184
3185
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3186
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3187
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3188
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3189
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3190
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3191
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3192
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3193
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3194
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3195
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3196
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3197
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3198
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3199
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3200
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3201
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3202
};
3203
3204
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3205
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3213
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3214
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3215
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3217
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3218
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3219
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3220
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3223
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3224
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3233
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3234
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3235
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3236
};
3237
3238
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3239
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3240
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3241
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3242
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3243
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3244
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3245
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3246
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3247
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3248
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3249
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3250
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3251
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3252
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3253
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3254
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3255
};
3256
3257
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3258
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3266
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3267
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3273
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3274
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3275
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3276
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3277
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3282
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
};
3286
3287
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3288
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3289
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3290
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3291
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3292
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3293
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3294
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3295
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3296
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3297
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3298
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3299
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3300
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3301
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3302
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3303
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3304
};
3305
3306
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3307
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3315
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3316
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3317
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3318
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3324
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3327
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3328
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3330
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3332
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3333
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3334
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3335
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3337
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3346
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3347
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3348
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3349
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3350
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3351
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3352
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3353
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3354
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3355
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3356
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3360
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3370
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3375
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3377
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3395
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3398
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3400
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3401
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3402
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3403
};
3404
3405
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3406
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3407
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3408
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3409
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3410
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3411
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3412
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3413
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3414
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3415
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3416
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3417
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3418
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3419
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3420
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3421
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3422
};
3423
3424
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3425
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3433
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3434
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3435
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3436
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3443
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3448
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3449
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3450
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3451
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3452
};
3453
3454
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3455
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3456
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3457
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3458
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3459
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3460
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3461
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3462
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3463
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3464
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3465
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3466
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3467
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3468
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3469
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3470
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3471
};
3472
3473
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3474
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3482
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3483
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3484
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3485
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3486
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3487
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3491
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3493
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3500
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3502
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3503
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3507
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3510
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3511
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3512
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3513
};
3514
3515
3516
/*
3517
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3518
 */
3519
3520
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3521
    const unsigned char* in, int *inlen) {
3522
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3523
}
3524
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3525
    const unsigned char* in, int *inlen) {
3526
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3527
}
3528
3529
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3530
    const unsigned char* in, int *inlen) {
3531
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3532
}
3533
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3534
    const unsigned char* in, int *inlen) {
3535
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3536
}
3537
3538
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3539
    const unsigned char* in, int *inlen) {
3540
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3541
}
3542
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3543
    const unsigned char* in, int *inlen) {
3544
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3545
}
3546
3547
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3548
    const unsigned char* in, int *inlen) {
3549
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3550
}
3551
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3552
    const unsigned char* in, int *inlen) {
3553
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3554
}
3555
3556
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3557
    const unsigned char* in, int *inlen) {
3558
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3559
}
3560
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3561
    const unsigned char* in, int *inlen) {
3562
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3563
}
3564
3565
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3566
    const unsigned char* in, int *inlen) {
3567
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3568
}
3569
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3570
    const unsigned char* in, int *inlen) {
3571
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3572
}
3573
3574
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3575
    const unsigned char* in, int *inlen) {
3576
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3577
}
3578
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3579
    const unsigned char* in, int *inlen) {
3580
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3581
}
3582
3583
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3584
    const unsigned char* in, int *inlen) {
3585
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3586
}
3587
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3588
    const unsigned char* in, int *inlen) {
3589
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3590
}
3591
3592
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3593
    const unsigned char* in, int *inlen) {
3594
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3595
}
3596
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3597
    const unsigned char* in, int *inlen) {
3598
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3599
}
3600
3601
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3602
    const unsigned char* in, int *inlen) {
3603
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3604
}
3605
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3606
    const unsigned char* in, int *inlen) {
3607
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3608
}
3609
3610
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3611
    const unsigned char* in, int *inlen) {
3612
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3613
}
3614
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3615
    const unsigned char* in, int *inlen) {
3616
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3617
}
3618
3619
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3620
    const unsigned char* in, int *inlen) {
3621
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3622
}
3623
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3624
    const unsigned char* in, int *inlen) {
3625
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3626
}
3627
3628
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3629
    const unsigned char* in, int *inlen) {
3630
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3631
}
3632
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3633
    const unsigned char* in, int *inlen) {
3634
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3635
}
3636
3637
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3638
    const unsigned char* in, int *inlen) {
3639
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3640
}
3641
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3642
    const unsigned char* in, int *inlen) {
3643
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3644
}
3645
3646
#endif
3647
#endif
3648