Coverage Report

Created: 2025-07-11 06:46

/src/tinysparql/subprojects/libxml2-2.13.1/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#include <libxml/parser.h>
38
#ifdef LIBXML_HTML_ENABLED
39
#include <libxml/HTMLparser.h>
40
#endif
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
static int xmlLittleEndian = 1;
73
74
/************************************************************************
75
 *                  *
76
 *    Conversions To/From UTF8 encoding     *
77
 *                  *
78
 ************************************************************************/
79
80
/**
81
 * asciiToUTF8:
82
 * @out:  a pointer to an array of bytes to store the result
83
 * @outlen:  the length of @out
84
 * @in:  a pointer to an array of ASCII chars
85
 * @inlen:  the length of @in
86
 *
87
 * Take a block of ASCII chars in and try to convert it to an UTF-8
88
 * block of chars out.
89
 *
90
 * Returns the number of bytes written or an XML_ENC_ERR code.
91
 *
92
 * The value of @inlen after return is the number of octets consumed
93
 *     if the return value is positive, else unpredictable.
94
 * The value of @outlen after return is the number of octets produced.
95
 */
96
static int
97
asciiToUTF8(unsigned char* out, int *outlen,
98
0
              const unsigned char* in, int *inlen) {
99
0
    unsigned char* outstart = out;
100
0
    const unsigned char* base = in;
101
0
    const unsigned char* processed = in;
102
0
    unsigned char* outend = out + *outlen;
103
0
    const unsigned char* inend;
104
0
    unsigned int c;
105
106
0
    inend = in + (*inlen);
107
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
108
0
  c= *in++;
109
110
0
        if (out >= outend)
111
0
      break;
112
0
        if (c < 0x80) {
113
0
      *out++ = c;
114
0
  } else {
115
0
      *outlen = out - outstart;
116
0
      *inlen = processed - base;
117
0
      return(XML_ENC_ERR_INPUT);
118
0
  }
119
120
0
  processed = (const unsigned char*) in;
121
0
    }
122
0
    *outlen = out - outstart;
123
0
    *inlen = processed - base;
124
0
    return(*outlen);
125
0
}
126
127
#ifdef LIBXML_OUTPUT_ENABLED
128
/**
129
 * UTF8Toascii:
130
 * @out:  a pointer to an array of bytes to store the result
131
 * @outlen:  the length of @out
132
 * @in:  a pointer to an array of UTF-8 chars
133
 * @inlen:  the length of @in
134
 *
135
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
136
 * block of chars out.
137
 *
138
 * Returns the number of bytes written or an XML_ENC_ERR code.
139
 *
140
 * The value of @inlen after return is the number of octets consumed
141
 *     if the return value is positive, else unpredictable.
142
 * The value of @outlen after return is the number of octets produced.
143
 */
144
static int
145
UTF8Toascii(unsigned char* out, int *outlen,
146
0
              const unsigned char* in, int *inlen) {
147
0
    const unsigned char* processed = in;
148
0
    const unsigned char* outend;
149
0
    const unsigned char* outstart = out;
150
0
    const unsigned char* instart = in;
151
0
    const unsigned char* inend;
152
0
    unsigned int c, d;
153
0
    int trailing;
154
155
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
156
0
        return(XML_ENC_ERR_INTERNAL);
157
0
    if (in == NULL) {
158
        /*
159
   * initialization nothing to do
160
   */
161
0
  *outlen = 0;
162
0
  *inlen = 0;
163
0
  return(0);
164
0
    }
165
0
    inend = in + (*inlen);
166
0
    outend = out + (*outlen);
167
0
    while (in < inend) {
168
0
  d = *in++;
169
0
  if      (d < 0x80)  { c= d; trailing= 0; }
170
0
  else if (d < 0xC0) {
171
      /* trailing byte in leading position */
172
0
      *outlen = out - outstart;
173
0
      *inlen = processed - instart;
174
0
      return(XML_ENC_ERR_INPUT);
175
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
176
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
177
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
178
0
  else {
179
      /* no chance for this in Ascii */
180
0
      *outlen = out - outstart;
181
0
      *inlen = processed - instart;
182
0
      return(XML_ENC_ERR_INPUT);
183
0
  }
184
185
0
  if (inend - in < trailing) {
186
0
      break;
187
0
  }
188
189
0
  for ( ; trailing; trailing--) {
190
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
191
0
    break;
192
0
      c <<= 6;
193
0
      c |= d & 0x3F;
194
0
  }
195
196
  /* assertion: c is a single UTF-4 value */
197
0
  if (c < 0x80) {
198
0
      if (out >= outend)
199
0
    break;
200
0
      *out++ = c;
201
0
  } else {
202
      /* no chance for this in Ascii */
203
0
      *outlen = out - outstart;
204
0
      *inlen = processed - instart;
205
0
      return(XML_ENC_ERR_INPUT);
206
0
  }
207
0
  processed = in;
208
0
    }
209
0
    *outlen = out - outstart;
210
0
    *inlen = processed - instart;
211
0
    return(*outlen);
212
0
}
213
#endif /* LIBXML_OUTPUT_ENABLED */
214
215
/**
216
 * isolat1ToUTF8:
217
 * @out:  a pointer to an array of bytes to store the result
218
 * @outlen:  the length of @out
219
 * @in:  a pointer to an array of ISO Latin 1 chars
220
 * @inlen:  the length of @in
221
 *
222
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
223
 * block of chars out.
224
 *
225
 * Returns the number of bytes written or an XML_ENC_ERR code.
226
 *
227
 * The value of @inlen after return is the number of octets consumed
228
 *     if the return value is positive, else unpredictable.
229
 * The value of @outlen after return is the number of octets produced.
230
 */
231
int
232
isolat1ToUTF8(unsigned char* out, int *outlen,
233
0
              const unsigned char* in, int *inlen) {
234
0
    unsigned char* outstart = out;
235
0
    const unsigned char* base = in;
236
0
    unsigned char* outend;
237
0
    const unsigned char* inend;
238
0
    const unsigned char* instop;
239
240
0
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241
0
  return(XML_ENC_ERR_INTERNAL);
242
243
0
    outend = out + *outlen;
244
0
    inend = in + (*inlen);
245
0
    instop = inend;
246
247
0
    while ((in < inend) && (out < outend - 1)) {
248
0
  if (*in >= 0x80) {
249
0
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
250
0
            *out++ = ((*in) & 0x3F) | 0x80;
251
0
      ++in;
252
0
  }
253
0
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
254
0
  while ((in < instop) && (*in < 0x80)) {
255
0
      *out++ = *in++;
256
0
  }
257
0
    }
258
0
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
259
0
        *out++ = *in++;
260
0
    }
261
0
    *outlen = out - outstart;
262
0
    *inlen = in - base;
263
0
    return(*outlen);
264
0
}
265
266
/**
267
 * UTF8ToUTF8:
268
 * @out:  a pointer to an array of bytes to store the result
269
 * @outlen:  the length of @out
270
 * @inb:  a pointer to an array of UTF-8 chars
271
 * @inlenb:  the length of @in in UTF-8 chars
272
 *
273
 * No op copy operation for UTF8 handling.
274
 *
275
 * Returns the number of bytes written or an XML_ENC_ERR code.
276
 *
277
 *     The value of *inlen after return is the number of octets consumed
278
 *     if the return value is positive, else unpredictable.
279
 */
280
static int
281
UTF8ToUTF8(unsigned char* out, int *outlen,
282
           const unsigned char* inb, int *inlenb)
283
0
{
284
0
    int len;
285
286
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
287
0
  return(XML_ENC_ERR_INTERNAL);
288
0
    if (inb == NULL) {
289
        /* inb == NULL means output is initialized. */
290
0
        *outlen = 0;
291
0
        *inlenb = 0;
292
0
        return(0);
293
0
    }
294
0
    if (*outlen > *inlenb) {
295
0
  len = *inlenb;
296
0
    } else {
297
0
  len = *outlen;
298
0
    }
299
0
    if (len < 0)
300
0
  return(XML_ENC_ERR_INTERNAL);
301
302
    /*
303
     * FIXME: Conversion functions must assure valid UTF-8, so we have
304
     * to check for UTF-8 validity. Preferably, this converter shouldn't
305
     * be used at all.
306
     */
307
0
    memcpy(out, inb, len);
308
309
0
    *outlen = len;
310
0
    *inlenb = len;
311
0
    return(*outlen);
312
0
}
313
314
315
#ifdef LIBXML_OUTPUT_ENABLED
316
/**
317
 * UTF8Toisolat1:
318
 * @out:  a pointer to an array of bytes to store the result
319
 * @outlen:  the length of @out
320
 * @in:  a pointer to an array of UTF-8 chars
321
 * @inlen:  the length of @in
322
 *
323
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
324
 * block of chars out.
325
 *
326
 * Returns the number of bytes written or an XML_ENC_ERR code.
327
 *
328
 * The value of @inlen after return is the number of octets consumed
329
 *     if the return value is positive, else unpredictable.
330
 * The value of @outlen after return is the number of octets produced.
331
 */
332
int
333
UTF8Toisolat1(unsigned char* out, int *outlen,
334
0
              const unsigned char* in, int *inlen) {
335
0
    const unsigned char* processed = in;
336
0
    const unsigned char* outend;
337
0
    const unsigned char* outstart = out;
338
0
    const unsigned char* instart = in;
339
0
    const unsigned char* inend;
340
0
    unsigned int c, d;
341
0
    int trailing;
342
343
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
344
0
        return(XML_ENC_ERR_INTERNAL);
345
0
    if (in == NULL) {
346
        /*
347
   * initialization nothing to do
348
   */
349
0
  *outlen = 0;
350
0
  *inlen = 0;
351
0
  return(0);
352
0
    }
353
0
    inend = in + (*inlen);
354
0
    outend = out + (*outlen);
355
0
    while (in < inend) {
356
0
  d = *in++;
357
0
  if      (d < 0x80)  { c= d; trailing= 0; }
358
0
  else if (d < 0xC0) {
359
      /* trailing byte in leading position */
360
0
      *outlen = out - outstart;
361
0
      *inlen = processed - instart;
362
0
      return(XML_ENC_ERR_INPUT);
363
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
364
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
365
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
366
0
  else {
367
      /* no chance for this in IsoLat1 */
368
0
      *outlen = out - outstart;
369
0
      *inlen = processed - instart;
370
0
      return(XML_ENC_ERR_INPUT);
371
0
  }
372
373
0
  if (inend - in < trailing) {
374
0
      break;
375
0
  }
376
377
0
  for ( ; trailing; trailing--) {
378
0
      if (in >= inend)
379
0
    break;
380
0
      if (((d= *in++) & 0xC0) != 0x80) {
381
0
    *outlen = out - outstart;
382
0
    *inlen = processed - instart;
383
0
    return(XML_ENC_ERR_INPUT);
384
0
      }
385
0
      c <<= 6;
386
0
      c |= d & 0x3F;
387
0
  }
388
389
  /* assertion: c is a single UTF-4 value */
390
0
  if (c <= 0xFF) {
391
0
      if (out >= outend)
392
0
    break;
393
0
      *out++ = c;
394
0
  } else {
395
      /* no chance for this in IsoLat1 */
396
0
      *outlen = out - outstart;
397
0
      *inlen = processed - instart;
398
0
      return(XML_ENC_ERR_INPUT);
399
0
  }
400
0
  processed = in;
401
0
    }
402
0
    *outlen = out - outstart;
403
0
    *inlen = processed - instart;
404
0
    return(*outlen);
405
0
}
406
#endif /* LIBXML_OUTPUT_ENABLED */
407
408
/**
409
 * UTF16LEToUTF8:
410
 * @out:  a pointer to an array of bytes to store the result
411
 * @outlen:  the length of @out
412
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
413
 * @inlenb:  the length of @in in UTF-16LE chars
414
 *
415
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
416
 * block of chars out. This function assumes the endian property
417
 * is the same between the native type of this machine and the
418
 * inputed one.
419
 *
420
 * Returns the number of bytes written or an XML_ENC_ERR code.
421
 *
422
 * The value of *inlen after return is the number of octets consumed
423
 * if the return value is positive, else unpredictable.
424
 */
425
static int
426
UTF16LEToUTF8(unsigned char* out, int *outlen,
427
            const unsigned char* inb, int *inlenb)
428
0
{
429
0
    unsigned char* outstart = out;
430
0
    const unsigned char* processed = inb;
431
0
    unsigned char* outend;
432
0
    unsigned short* in = (unsigned short *) (void *) inb;
433
0
    unsigned short* inend;
434
0
    unsigned int c, d, inlen;
435
0
    unsigned char *tmp;
436
0
    int bits;
437
438
0
    if (*outlen == 0) {
439
0
        *inlenb = 0;
440
0
        return(0);
441
0
    }
442
0
    outend = out + *outlen;
443
0
    if ((*inlenb % 2) == 1)
444
0
        (*inlenb)--;
445
0
    inlen = *inlenb / 2;
446
0
    inend = in + inlen;
447
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
448
0
        if (xmlLittleEndian) {
449
0
      c= *in++;
450
0
  } else {
451
0
      tmp = (unsigned char *) in;
452
0
      c = *tmp++;
453
0
      c = c | (*tmp << 8);
454
0
      in++;
455
0
  }
456
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
457
0
      if (in >= inend) {           /* handle split mutli-byte characters */
458
0
    break;
459
0
      }
460
0
      if (xmlLittleEndian) {
461
0
    d = *in++;
462
0
      } else {
463
0
    tmp = (unsigned char *) in;
464
0
    d = *tmp++;
465
0
    d = d | (*tmp << 8);
466
0
    in++;
467
0
      }
468
0
            if ((d & 0xFC00) == 0xDC00) {
469
0
                c &= 0x03FF;
470
0
                c <<= 10;
471
0
                c |= d & 0x03FF;
472
0
                c += 0x10000;
473
0
            }
474
0
            else {
475
0
    *outlen = out - outstart;
476
0
    *inlenb = processed - inb;
477
0
          return(XML_ENC_ERR_INPUT);
478
0
      }
479
0
        }
480
481
  /* assertion: c is a single UTF-4 value */
482
0
        if (out >= outend)
483
0
      break;
484
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
485
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
486
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
487
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
488
489
0
        for ( ; bits >= 0; bits-= 6) {
490
0
            if (out >= outend)
491
0
          break;
492
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
493
0
        }
494
0
  processed = (const unsigned char*) in;
495
0
    }
496
0
    *outlen = out - outstart;
497
0
    *inlenb = processed - inb;
498
0
    return(*outlen);
499
0
}
500
501
#ifdef LIBXML_OUTPUT_ENABLED
502
/**
503
 * UTF8ToUTF16LE:
504
 * @outb:  a pointer to an array of bytes to store the result
505
 * @outlen:  the length of @outb
506
 * @in:  a pointer to an array of UTF-8 chars
507
 * @inlen:  the length of @in
508
 *
509
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
510
 * block of chars out.
511
 *
512
 * Returns the number of bytes written or an XML_ENC_ERR code.
513
 */
514
static int
515
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
516
            const unsigned char* in, int *inlen)
517
0
{
518
0
    unsigned short* out = (unsigned short *) (void *) outb;
519
0
    const unsigned char* processed = in;
520
0
    const unsigned char *const instart = in;
521
0
    unsigned short* outstart= out;
522
0
    unsigned short* outend;
523
0
    const unsigned char* inend;
524
0
    unsigned int c, d;
525
0
    int trailing;
526
0
    unsigned char *tmp;
527
0
    unsigned short tmp1, tmp2;
528
529
    /* UTF16LE encoding has no BOM */
530
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
531
0
        return(XML_ENC_ERR_INTERNAL);
532
0
    if (in == NULL) {
533
0
  *outlen = 0;
534
0
  *inlen = 0;
535
0
  return(0);
536
0
    }
537
0
    inend= in + *inlen;
538
0
    outend = out + (*outlen / 2);
539
0
    while (in < inend) {
540
0
      d= *in++;
541
0
      if      (d < 0x80)  { c= d; trailing= 0; }
542
0
      else if (d < 0xC0) {
543
          /* trailing byte in leading position */
544
0
    *outlen = (out - outstart) * 2;
545
0
    *inlen = processed - instart;
546
0
    return(XML_ENC_ERR_INPUT);
547
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
548
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
549
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
550
0
      else {
551
  /* no chance for this in UTF-16 */
552
0
  *outlen = (out - outstart) * 2;
553
0
  *inlen = processed - instart;
554
0
  return(XML_ENC_ERR_INPUT);
555
0
      }
556
557
0
      if (inend - in < trailing) {
558
0
          break;
559
0
      }
560
561
0
      for ( ; trailing; trailing--) {
562
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
563
0
        break;
564
0
          c <<= 6;
565
0
          c |= d & 0x3F;
566
0
      }
567
568
      /* assertion: c is a single UTF-4 value */
569
0
        if (c < 0x10000) {
570
0
            if (out >= outend)
571
0
          break;
572
0
      if (xmlLittleEndian) {
573
0
    *out++ = c;
574
0
      } else {
575
0
    tmp = (unsigned char *) out;
576
0
    *tmp = (unsigned char) c; /* Explicit truncation */
577
0
    *(tmp + 1) = c >> 8 ;
578
0
    out++;
579
0
      }
580
0
        }
581
0
        else if (c < 0x110000) {
582
0
            if (out+1 >= outend)
583
0
          break;
584
0
            c -= 0x10000;
585
0
      if (xmlLittleEndian) {
586
0
    *out++ = 0xD800 | (c >> 10);
587
0
    *out++ = 0xDC00 | (c & 0x03FF);
588
0
      } else {
589
0
    tmp1 = 0xD800 | (c >> 10);
590
0
    tmp = (unsigned char *) out;
591
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
592
0
    *(tmp + 1) = tmp1 >> 8;
593
0
    out++;
594
595
0
    tmp2 = 0xDC00 | (c & 0x03FF);
596
0
    tmp = (unsigned char *) out;
597
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
598
0
    *(tmp + 1) = tmp2 >> 8;
599
0
    out++;
600
0
      }
601
0
        }
602
0
        else
603
0
      break;
604
0
  processed = in;
605
0
    }
606
0
    *outlen = (out - outstart) * 2;
607
0
    *inlen = processed - instart;
608
0
    return(*outlen);
609
0
}
610
611
/**
612
 * UTF8ToUTF16:
613
 * @outb:  a pointer to an array of bytes to store the result
614
 * @outlen:  the length of @outb
615
 * @in:  a pointer to an array of UTF-8 chars
616
 * @inlen:  the length of @in
617
 *
618
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
619
 * block of chars out.
620
 *
621
 * Returns the number of bytes written or an XML_ENC_ERR code.
622
 */
623
static int
624
UTF8ToUTF16(unsigned char* outb, int *outlen,
625
            const unsigned char* in, int *inlen)
626
0
{
627
0
    if (in == NULL) {
628
  /*
629
   * initialization, add the Byte Order Mark for UTF-16LE
630
   */
631
0
        if (*outlen >= 2) {
632
0
      outb[0] = 0xFF;
633
0
      outb[1] = 0xFE;
634
0
      *outlen = 2;
635
0
      *inlen = 0;
636
0
      return(2);
637
0
  }
638
0
  *outlen = 0;
639
0
  *inlen = 0;
640
0
  return(0);
641
0
    }
642
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
643
0
}
644
#endif /* LIBXML_OUTPUT_ENABLED */
645
646
/**
647
 * UTF16BEToUTF8:
648
 * @out:  a pointer to an array of bytes to store the result
649
 * @outlen:  the length of @out
650
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
651
 * @inlenb:  the length of @in in UTF-16 chars
652
 *
653
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
654
 * block of chars out. This function assumes the endian property
655
 * is the same between the native type of this machine and the
656
 * inputed one.
657
 *
658
 * Returns the number of bytes written or an XML_ENC_ERR code.
659
 *
660
 * The value of *inlen after return is the number of octets consumed
661
 * if the return value is positive, else unpredictable.
662
 */
663
static int
664
UTF16BEToUTF8(unsigned char* out, int *outlen,
665
            const unsigned char* inb, int *inlenb)
666
0
{
667
0
    unsigned char* outstart = out;
668
0
    const unsigned char* processed = inb;
669
0
    unsigned char* outend;
670
0
    unsigned short* in = (unsigned short *) (void *) inb;
671
0
    unsigned short* inend;
672
0
    unsigned int c, d, inlen;
673
0
    unsigned char *tmp;
674
0
    int bits;
675
676
0
    if (*outlen == 0) {
677
0
        *inlenb = 0;
678
0
        return(0);
679
0
    }
680
0
    outend = out + *outlen;
681
0
    if ((*inlenb % 2) == 1)
682
0
        (*inlenb)--;
683
0
    inlen = *inlenb / 2;
684
0
    inend= in + inlen;
685
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
686
0
  if (xmlLittleEndian) {
687
0
      tmp = (unsigned char *) in;
688
0
      c = *tmp++;
689
0
      c = (c << 8) | *tmp;
690
0
      in++;
691
0
  } else {
692
0
      c= *in++;
693
0
  }
694
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
695
0
      if (in >= inend) {           /* handle split mutli-byte characters */
696
0
                break;
697
0
      }
698
0
      if (xmlLittleEndian) {
699
0
    tmp = (unsigned char *) in;
700
0
    d = *tmp++;
701
0
    d = (d << 8) | *tmp;
702
0
    in++;
703
0
      } else {
704
0
    d= *in++;
705
0
      }
706
0
            if ((d & 0xFC00) == 0xDC00) {
707
0
                c &= 0x03FF;
708
0
                c <<= 10;
709
0
                c |= d & 0x03FF;
710
0
                c += 0x10000;
711
0
            }
712
0
            else {
713
0
    *outlen = out - outstart;
714
0
    *inlenb = processed - inb;
715
0
          return(XML_ENC_ERR_INPUT);
716
0
      }
717
0
        }
718
719
  /* assertion: c is a single UTF-4 value */
720
0
        if (out >= outend)
721
0
      break;
722
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
723
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
724
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
725
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
726
727
0
        for ( ; bits >= 0; bits-= 6) {
728
0
            if (out >= outend)
729
0
          break;
730
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
731
0
        }
732
0
  processed = (const unsigned char*) in;
733
0
    }
734
0
    *outlen = out - outstart;
735
0
    *inlenb = processed - inb;
736
0
    return(*outlen);
737
0
}
738
739
#ifdef LIBXML_OUTPUT_ENABLED
740
/**
741
 * UTF8ToUTF16BE:
742
 * @outb:  a pointer to an array of bytes to store the result
743
 * @outlen:  the length of @outb
744
 * @in:  a pointer to an array of UTF-8 chars
745
 * @inlen:  the length of @in
746
 *
747
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
748
 * block of chars out.
749
 *
750
 * Returns the number of bytes written or an XML_ENC_ERR code.
751
 */
752
static int
753
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
754
            const unsigned char* in, int *inlen)
755
0
{
756
0
    unsigned short* out = (unsigned short *) (void *) outb;
757
0
    const unsigned char* processed = in;
758
0
    const unsigned char *const instart = in;
759
0
    unsigned short* outstart= out;
760
0
    unsigned short* outend;
761
0
    const unsigned char* inend;
762
0
    unsigned int c, d;
763
0
    int trailing;
764
0
    unsigned char *tmp;
765
0
    unsigned short tmp1, tmp2;
766
767
    /* UTF-16BE has no BOM */
768
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
769
0
        return(XML_ENC_ERR_INTERNAL);
770
0
    if (in == NULL) {
771
0
  *outlen = 0;
772
0
  *inlen = 0;
773
0
  return(0);
774
0
    }
775
0
    inend= in + *inlen;
776
0
    outend = out + (*outlen / 2);
777
0
    while (in < inend) {
778
0
      d= *in++;
779
0
      if      (d < 0x80)  { c= d; trailing= 0; }
780
0
      else if (d < 0xC0)  {
781
          /* trailing byte in leading position */
782
0
    *outlen = out - outstart;
783
0
    *inlen = processed - instart;
784
0
    return(XML_ENC_ERR_INPUT);
785
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
786
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
787
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
788
0
      else {
789
          /* no chance for this in UTF-16 */
790
0
    *outlen = out - outstart;
791
0
    *inlen = processed - instart;
792
0
    return(XML_ENC_ERR_INPUT);
793
0
      }
794
795
0
      if (inend - in < trailing) {
796
0
          break;
797
0
      }
798
799
0
      for ( ; trailing; trailing--) {
800
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
801
0
          c <<= 6;
802
0
          c |= d & 0x3F;
803
0
      }
804
805
      /* assertion: c is a single UTF-4 value */
806
0
        if (c < 0x10000) {
807
0
            if (out >= outend)  break;
808
0
      if (xmlLittleEndian) {
809
0
    tmp = (unsigned char *) out;
810
0
    *tmp = c >> 8;
811
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
812
0
    out++;
813
0
      } else {
814
0
    *out++ = c;
815
0
      }
816
0
        }
817
0
        else if (c < 0x110000) {
818
0
            if (out+1 >= outend)  break;
819
0
            c -= 0x10000;
820
0
      if (xmlLittleEndian) {
821
0
    tmp1 = 0xD800 | (c >> 10);
822
0
    tmp = (unsigned char *) out;
823
0
    *tmp = tmp1 >> 8;
824
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
825
0
    out++;
826
827
0
    tmp2 = 0xDC00 | (c & 0x03FF);
828
0
    tmp = (unsigned char *) out;
829
0
    *tmp = tmp2 >> 8;
830
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
831
0
    out++;
832
0
      } else {
833
0
    *out++ = 0xD800 | (c >> 10);
834
0
    *out++ = 0xDC00 | (c & 0x03FF);
835
0
      }
836
0
        }
837
0
        else
838
0
      break;
839
0
  processed = in;
840
0
    }
841
0
    *outlen = (out - outstart) * 2;
842
0
    *inlen = processed - instart;
843
0
    return(*outlen);
844
0
}
845
#endif /* LIBXML_OUTPUT_ENABLED */
846
847
/************************************************************************
848
 *                  *
849
 *    Generic encoding handling routines      *
850
 *                  *
851
 ************************************************************************/
852
853
/**
854
 * xmlDetectCharEncoding:
855
 * @in:  a pointer to the first bytes of the XML entity, must be at least
856
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
857
 * @len:  pointer to the length of the buffer
858
 *
859
 * Guess the encoding of the entity using the first bytes of the entity content
860
 * according to the non-normative appendix F of the XML-1.0 recommendation.
861
 *
862
 * Returns one of the XML_CHAR_ENCODING_... values.
863
 */
864
xmlCharEncoding
865
xmlDetectCharEncoding(const unsigned char* in, int len)
866
0
{
867
0
    if (in == NULL)
868
0
        return(XML_CHAR_ENCODING_NONE);
869
0
    if (len >= 4) {
870
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
871
0
      (in[2] == 0x00) && (in[3] == 0x3C))
872
0
      return(XML_CHAR_ENCODING_UCS4BE);
873
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
874
0
      (in[2] == 0x00) && (in[3] == 0x00))
875
0
      return(XML_CHAR_ENCODING_UCS4LE);
876
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
877
0
      (in[2] == 0x3C) && (in[3] == 0x00))
878
0
      return(XML_CHAR_ENCODING_UCS4_2143);
879
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
880
0
      (in[2] == 0x00) && (in[3] == 0x00))
881
0
      return(XML_CHAR_ENCODING_UCS4_3412);
882
0
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
883
0
      (in[2] == 0xA7) && (in[3] == 0x94))
884
0
      return(XML_CHAR_ENCODING_EBCDIC);
885
0
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
886
0
      (in[2] == 0x78) && (in[3] == 0x6D))
887
0
      return(XML_CHAR_ENCODING_UTF8);
888
  /*
889
   * Although not part of the recommendation, we also
890
   * attempt an "auto-recognition" of UTF-16LE and
891
   * UTF-16BE encodings.
892
   */
893
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
894
0
      (in[2] == 0x3F) && (in[3] == 0x00))
895
0
      return(XML_CHAR_ENCODING_UTF16LE);
896
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
897
0
      (in[2] == 0x00) && (in[3] == 0x3F))
898
0
      return(XML_CHAR_ENCODING_UTF16BE);
899
0
    }
900
0
    if (len >= 3) {
901
  /*
902
   * Errata on XML-1.0 June 20 2001
903
   * We now allow an UTF8 encoded BOM
904
   */
905
0
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
906
0
      (in[2] == 0xBF))
907
0
      return(XML_CHAR_ENCODING_UTF8);
908
0
    }
909
    /* For UTF-16 we can recognize by the BOM */
910
0
    if (len >= 2) {
911
0
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
912
0
      return(XML_CHAR_ENCODING_UTF16BE);
913
0
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
914
0
      return(XML_CHAR_ENCODING_UTF16LE);
915
0
    }
916
0
    return(XML_CHAR_ENCODING_NONE);
917
0
}
918
919
/**
920
 * xmlCleanupEncodingAliases:
921
 *
922
 * Unregisters all aliases
923
 */
924
void
925
0
xmlCleanupEncodingAliases(void) {
926
0
    int i;
927
928
0
    if (xmlCharEncodingAliases == NULL)
929
0
  return;
930
931
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
932
0
  if (xmlCharEncodingAliases[i].name != NULL)
933
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
934
0
  if (xmlCharEncodingAliases[i].alias != NULL)
935
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
936
0
    }
937
0
    xmlCharEncodingAliasesNb = 0;
938
0
    xmlCharEncodingAliasesMax = 0;
939
0
    xmlFree(xmlCharEncodingAliases);
940
0
    xmlCharEncodingAliases = NULL;
941
0
}
942
943
/**
944
 * xmlGetEncodingAlias:
945
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
946
 *
947
 * Lookup an encoding name for the given alias.
948
 *
949
 * Returns NULL if not found, otherwise the original name
950
 */
951
const char *
952
0
xmlGetEncodingAlias(const char *alias) {
953
0
    int i;
954
0
    char upper[100];
955
956
0
    if (alias == NULL)
957
0
  return(NULL);
958
959
0
    if (xmlCharEncodingAliases == NULL)
960
0
  return(NULL);
961
962
0
    for (i = 0;i < 99;i++) {
963
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
964
0
  if (upper[i] == 0) break;
965
0
    }
966
0
    upper[i] = 0;
967
968
    /*
969
     * Walk down the list looking for a definition of the alias
970
     */
971
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
972
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
973
0
      return(xmlCharEncodingAliases[i].name);
974
0
  }
975
0
    }
976
0
    return(NULL);
977
0
}
978
979
/**
980
 * xmlAddEncodingAlias:
981
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
982
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
983
 *
984
 * Registers an alias @alias for an encoding named @name. Existing alias
985
 * will be overwritten.
986
 *
987
 * Returns 0 in case of success, -1 in case of error
988
 */
989
int
990
0
xmlAddEncodingAlias(const char *name, const char *alias) {
991
0
    int i;
992
0
    char upper[100];
993
0
    char *nameCopy, *aliasCopy;
994
995
0
    if ((name == NULL) || (alias == NULL))
996
0
  return(-1);
997
998
0
    for (i = 0;i < 99;i++) {
999
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1000
0
  if (upper[i] == 0) break;
1001
0
    }
1002
0
    upper[i] = 0;
1003
1004
0
    if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1005
0
        xmlCharEncodingAliasPtr tmp;
1006
0
        size_t newSize = xmlCharEncodingAliasesMax ?
1007
0
                         xmlCharEncodingAliasesMax * 2 :
1008
0
                         20;
1009
1010
0
        tmp = (xmlCharEncodingAliasPtr)
1011
0
              xmlRealloc(xmlCharEncodingAliases,
1012
0
                         newSize * sizeof(xmlCharEncodingAlias));
1013
0
        if (tmp == NULL)
1014
0
            return(-1);
1015
0
        xmlCharEncodingAliases = tmp;
1016
0
        xmlCharEncodingAliasesMax = newSize;
1017
0
    }
1018
1019
    /*
1020
     * Walk down the list looking for a definition of the alias
1021
     */
1022
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024
      /*
1025
       * Replace the definition.
1026
       */
1027
0
      nameCopy = xmlMemStrdup(name);
1028
0
            if (nameCopy == NULL)
1029
0
                return(-1);
1030
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1031
0
      xmlCharEncodingAliases[i].name = nameCopy;
1032
0
      return(0);
1033
0
  }
1034
0
    }
1035
    /*
1036
     * Add the definition
1037
     */
1038
0
    nameCopy = xmlMemStrdup(name);
1039
0
    if (nameCopy == NULL)
1040
0
        return(-1);
1041
0
    aliasCopy = xmlMemStrdup(upper);
1042
0
    if (aliasCopy == NULL) {
1043
0
        xmlFree(nameCopy);
1044
0
        return(-1);
1045
0
    }
1046
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1047
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1048
0
    xmlCharEncodingAliasesNb++;
1049
0
    return(0);
1050
0
}
1051
1052
/**
1053
 * xmlDelEncodingAlias:
1054
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1055
 *
1056
 * Unregisters an encoding alias @alias
1057
 *
1058
 * Returns 0 in case of success, -1 in case of error
1059
 */
1060
int
1061
0
xmlDelEncodingAlias(const char *alias) {
1062
0
    int i;
1063
1064
0
    if (alias == NULL)
1065
0
  return(-1);
1066
1067
0
    if (xmlCharEncodingAliases == NULL)
1068
0
  return(-1);
1069
    /*
1070
     * Walk down the list looking for a definition of the alias
1071
     */
1072
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1073
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1074
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1075
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1076
0
      xmlCharEncodingAliasesNb--;
1077
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1078
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1079
0
      return(0);
1080
0
  }
1081
0
    }
1082
0
    return(-1);
1083
0
}
1084
1085
/**
1086
 * xmlParseCharEncoding:
1087
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1088
 *
1089
 * Compare the string to the encoding schemes already known. Note
1090
 * that the comparison is case insensitive accordingly to the section
1091
 * [XML] 4.3.3 Character Encoding in Entities.
1092
 *
1093
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1094
 * if not recognized.
1095
 */
1096
xmlCharEncoding
1097
xmlParseCharEncoding(const char* name)
1098
0
{
1099
0
    const char *alias;
1100
0
    char upper[500];
1101
0
    int i;
1102
1103
0
    if (name == NULL)
1104
0
  return(XML_CHAR_ENCODING_NONE);
1105
1106
    /*
1107
     * Do the alias resolution
1108
     */
1109
0
    alias = xmlGetEncodingAlias(name);
1110
0
    if (alias != NULL)
1111
0
  name = alias;
1112
1113
0
    for (i = 0;i < 499;i++) {
1114
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1115
0
  if (upper[i] == 0) break;
1116
0
    }
1117
0
    upper[i] = 0;
1118
1119
0
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1120
0
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1121
0
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1122
1123
    /*
1124
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1125
     *       already found and in use
1126
     */
1127
0
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1128
0
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1129
1130
0
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1131
0
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1132
0
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1133
1134
    /*
1135
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1136
     *       already found and in use
1137
     */
1138
0
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1139
0
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1140
0
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1141
1142
1143
0
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1144
0
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1145
0
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1146
1147
0
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1148
0
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1149
0
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1150
1151
0
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1152
0
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1153
0
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1154
0
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1155
0
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1156
0
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1157
0
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1158
1159
0
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1160
0
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1161
0
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1162
1163
0
    return(XML_CHAR_ENCODING_ERROR);
1164
0
}
1165
1166
/**
1167
 * xmlGetCharEncodingName:
1168
 * @enc:  the encoding
1169
 *
1170
 * The "canonical" name for XML encoding.
1171
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1172
 * Section 4.3.3  Character Encoding in Entities
1173
 *
1174
 * Returns the canonical name for the given encoding
1175
 */
1176
1177
const char*
1178
0
xmlGetCharEncodingName(xmlCharEncoding enc) {
1179
0
    switch (enc) {
1180
0
        case XML_CHAR_ENCODING_ERROR:
1181
0
      return(NULL);
1182
0
        case XML_CHAR_ENCODING_NONE:
1183
0
      return(NULL);
1184
0
        case XML_CHAR_ENCODING_UTF8:
1185
0
      return("UTF-8");
1186
0
        case XML_CHAR_ENCODING_UTF16LE:
1187
0
      return("UTF-16");
1188
0
        case XML_CHAR_ENCODING_UTF16BE:
1189
0
      return("UTF-16");
1190
0
        case XML_CHAR_ENCODING_EBCDIC:
1191
0
            return("EBCDIC");
1192
0
        case XML_CHAR_ENCODING_UCS4LE:
1193
0
            return("ISO-10646-UCS-4");
1194
0
        case XML_CHAR_ENCODING_UCS4BE:
1195
0
            return("ISO-10646-UCS-4");
1196
0
        case XML_CHAR_ENCODING_UCS4_2143:
1197
0
            return("ISO-10646-UCS-4");
1198
0
        case XML_CHAR_ENCODING_UCS4_3412:
1199
0
            return("ISO-10646-UCS-4");
1200
0
        case XML_CHAR_ENCODING_UCS2:
1201
0
            return("ISO-10646-UCS-2");
1202
0
        case XML_CHAR_ENCODING_8859_1:
1203
0
      return("ISO-8859-1");
1204
0
        case XML_CHAR_ENCODING_8859_2:
1205
0
      return("ISO-8859-2");
1206
0
        case XML_CHAR_ENCODING_8859_3:
1207
0
      return("ISO-8859-3");
1208
0
        case XML_CHAR_ENCODING_8859_4:
1209
0
      return("ISO-8859-4");
1210
0
        case XML_CHAR_ENCODING_8859_5:
1211
0
      return("ISO-8859-5");
1212
0
        case XML_CHAR_ENCODING_8859_6:
1213
0
      return("ISO-8859-6");
1214
0
        case XML_CHAR_ENCODING_8859_7:
1215
0
      return("ISO-8859-7");
1216
0
        case XML_CHAR_ENCODING_8859_8:
1217
0
      return("ISO-8859-8");
1218
0
        case XML_CHAR_ENCODING_8859_9:
1219
0
      return("ISO-8859-9");
1220
0
        case XML_CHAR_ENCODING_2022_JP:
1221
0
            return("ISO-2022-JP");
1222
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1223
0
            return("Shift-JIS");
1224
0
        case XML_CHAR_ENCODING_EUC_JP:
1225
0
            return("EUC-JP");
1226
0
  case XML_CHAR_ENCODING_ASCII:
1227
0
      return(NULL);
1228
0
    }
1229
0
    return(NULL);
1230
0
}
1231
1232
/************************************************************************
1233
 *                  *
1234
 *      Char encoding handlers        *
1235
 *                  *
1236
 ************************************************************************/
1237
1238
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1239
    defined(LIBXML_ISO8859X_ENABLED)
1240
1241
#define DECLARE_ISO_FUNCS(n) \
1242
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1243
                                   const unsigned char* in, int *inlen); \
1244
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1245
                                 const unsigned char* in, int *inlen);
1246
1247
/** DOC_DISABLE */
1248
DECLARE_ISO_FUNCS(2)
1249
DECLARE_ISO_FUNCS(3)
1250
DECLARE_ISO_FUNCS(4)
1251
DECLARE_ISO_FUNCS(5)
1252
DECLARE_ISO_FUNCS(6)
1253
DECLARE_ISO_FUNCS(7)
1254
DECLARE_ISO_FUNCS(8)
1255
DECLARE_ISO_FUNCS(9)
1256
DECLARE_ISO_FUNCS(10)
1257
DECLARE_ISO_FUNCS(11)
1258
DECLARE_ISO_FUNCS(13)
1259
DECLARE_ISO_FUNCS(14)
1260
DECLARE_ISO_FUNCS(15)
1261
DECLARE_ISO_FUNCS(16)
1262
/** DOC_ENABLE */
1263
1264
#endif /* LIBXML_ISO8859X_ENABLED */
1265
1266
#ifdef LIBXML_ICONV_ENABLED
1267
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1268
#else
1269
  #define EMPTY_ICONV
1270
#endif
1271
1272
#ifdef LIBXML_ICU_ENABLED
1273
  #define EMPTY_UCONV , NULL, NULL
1274
#else
1275
  #define EMPTY_UCONV
1276
#endif
1277
1278
#define MAKE_HANDLER(name, in, out) \
1279
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1280
1281
static const xmlCharEncodingHandler defaultHandlers[] = {
1282
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1283
#ifdef LIBXML_OUTPUT_ENABLED
1284
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1285
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1286
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1287
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1288
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1289
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1290
#ifdef LIBXML_HTML_ENABLED
1291
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1292
#endif
1293
#else
1294
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1295
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1296
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1297
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1298
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1299
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1300
#endif /* LIBXML_OUTPUT_ENABLED */
1301
1302
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1303
    defined(LIBXML_ISO8859X_ENABLED)
1304
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1305
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1306
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1307
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1308
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1309
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1310
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1311
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1312
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1313
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1314
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1315
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1316
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1317
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1318
#endif
1319
};
1320
1321
#define NUM_DEFAULT_HANDLERS \
1322
0
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1323
1324
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1325
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1326
static const xmlCharEncodingHandler *xmlLatin1Handler = &defaultHandlers[4];
1327
static const xmlCharEncodingHandler *xmlAsciiHandler = &defaultHandlers[5];
1328
1329
/* the size should be growable, but it's not a big deal ... */
1330
0
#define MAX_ENCODING_HANDLERS 50
1331
static xmlCharEncodingHandlerPtr *handlers = NULL;
1332
static int nbCharEncodingHandler = 0;
1333
1334
/**
1335
 * xmlNewCharEncodingHandler:
1336
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1337
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1338
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1339
 *
1340
 * Create and registers an xmlCharEncodingHandler.
1341
 *
1342
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1343
 */
1344
xmlCharEncodingHandlerPtr
1345
xmlNewCharEncodingHandler(const char *name,
1346
                          xmlCharEncodingInputFunc input,
1347
0
                          xmlCharEncodingOutputFunc output) {
1348
0
    xmlCharEncodingHandlerPtr handler;
1349
0
    const char *alias;
1350
0
    char upper[500];
1351
0
    int i;
1352
0
    char *up = NULL;
1353
1354
    /*
1355
     * Do the alias resolution
1356
     */
1357
0
    alias = xmlGetEncodingAlias(name);
1358
0
    if (alias != NULL)
1359
0
  name = alias;
1360
1361
    /*
1362
     * Keep only the uppercase version of the encoding.
1363
     */
1364
0
    if (name == NULL)
1365
0
  return(NULL);
1366
0
    for (i = 0;i < 499;i++) {
1367
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1368
0
  if (upper[i] == 0) break;
1369
0
    }
1370
0
    upper[i] = 0;
1371
0
    up = xmlMemStrdup(upper);
1372
0
    if (up == NULL)
1373
0
  return(NULL);
1374
1375
    /*
1376
     * allocate and fill-up an handler block.
1377
     */
1378
0
    handler = (xmlCharEncodingHandlerPtr)
1379
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1380
0
    if (handler == NULL) {
1381
0
        xmlFree(up);
1382
0
  return(NULL);
1383
0
    }
1384
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1385
0
    handler->input = input;
1386
0
    handler->output = output;
1387
0
    handler->name = up;
1388
1389
0
#ifdef LIBXML_ICONV_ENABLED
1390
0
    handler->iconv_in = NULL;
1391
0
    handler->iconv_out = NULL;
1392
0
#endif
1393
#ifdef LIBXML_ICU_ENABLED
1394
    handler->uconv_in = NULL;
1395
    handler->uconv_out = NULL;
1396
#endif
1397
1398
    /*
1399
     * registers and returns the handler.
1400
     */
1401
0
    xmlRegisterCharEncodingHandler(handler);
1402
0
    return(handler);
1403
0
}
1404
1405
/**
1406
 * xmlInitCharEncodingHandlers:
1407
 *
1408
 * DEPRECATED: Alias for xmlInitParser.
1409
 */
1410
void
1411
0
xmlInitCharEncodingHandlers(void) {
1412
0
    xmlInitParser();
1413
0
}
1414
1415
/**
1416
 * xmlInitEncodingInternal:
1417
 *
1418
 * Initialize the char encoding support.
1419
 */
1420
void
1421
0
xmlInitEncodingInternal(void) {
1422
0
    unsigned short int tst = 0x1234;
1423
0
    unsigned char *ptr = (unsigned char *) &tst;
1424
1425
0
    if (*ptr == 0x12) xmlLittleEndian = 0;
1426
0
    else xmlLittleEndian = 1;
1427
0
}
1428
1429
/**
1430
 * xmlCleanupCharEncodingHandlers:
1431
 *
1432
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1433
 * to free global state but see the warnings there. xmlCleanupParser
1434
 * should be only called once at program exit. In most cases, you don't
1435
 * have call cleanup functions at all.
1436
 *
1437
 * Cleanup the memory allocated for the char encoding support, it
1438
 * unregisters all the encoding handlers and the aliases.
1439
 */
1440
void
1441
0
xmlCleanupCharEncodingHandlers(void) {
1442
0
    xmlCleanupEncodingAliases();
1443
1444
0
    if (handlers == NULL) return;
1445
1446
0
    for (;nbCharEncodingHandler > 0;) {
1447
0
        nbCharEncodingHandler--;
1448
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1449
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1450
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1451
0
      xmlFree(handlers[nbCharEncodingHandler]);
1452
0
  }
1453
0
    }
1454
0
    xmlFree(handlers);
1455
0
    handlers = NULL;
1456
0
    nbCharEncodingHandler = 0;
1457
0
}
1458
1459
/**
1460
 * xmlRegisterCharEncodingHandler:
1461
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1462
 *
1463
 * Register the char encoding handler, surprising, isn't it ?
1464
 */
1465
void
1466
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1467
0
    if (handler == NULL)
1468
0
        return;
1469
0
    if (handlers == NULL) {
1470
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1471
0
        if (handlers == NULL)
1472
0
            goto free_handler;
1473
0
    }
1474
1475
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1476
0
        goto free_handler;
1477
0
    handlers[nbCharEncodingHandler++] = handler;
1478
0
    return;
1479
1480
0
free_handler:
1481
0
    if (handler != NULL) {
1482
0
        if (handler->name != NULL) {
1483
0
            xmlFree(handler->name);
1484
0
        }
1485
0
        xmlFree(handler);
1486
0
    }
1487
0
}
1488
1489
#ifdef LIBXML_ICONV_ENABLED
1490
static int
1491
0
xmlCreateIconvHandler(const char *name, xmlCharEncodingHandler **out) {
1492
0
    xmlCharEncodingHandlerPtr enc = NULL;
1493
0
    iconv_t icv_in = (iconv_t) -1;
1494
0
    iconv_t icv_out = (iconv_t) -1;
1495
0
    int ret;
1496
1497
0
    *out = NULL;
1498
1499
0
    icv_in = iconv_open("UTF-8", name);
1500
0
    if (icv_in == (iconv_t) -1) {
1501
0
        if (errno == EINVAL)
1502
0
            ret = XML_ERR_UNSUPPORTED_ENCODING;
1503
0
        else if (errno == ENOMEM)
1504
0
            ret = XML_ERR_NO_MEMORY;
1505
0
        else
1506
0
            ret = XML_ERR_SYSTEM;
1507
0
        goto error;
1508
0
    }
1509
1510
0
    icv_out = iconv_open(name, "UTF-8");
1511
0
    if (icv_out == (iconv_t) -1) {
1512
0
        if (errno == EINVAL)
1513
0
            ret = XML_ERR_UNSUPPORTED_ENCODING;
1514
0
        else if (errno == ENOMEM)
1515
0
            ret = XML_ERR_NO_MEMORY;
1516
0
        else
1517
0
            ret = XML_ERR_SYSTEM;
1518
0
        goto error;
1519
0
    }
1520
1521
0
    enc = xmlMalloc(sizeof(*enc));
1522
0
    if (enc == NULL) {
1523
0
        ret = XML_ERR_NO_MEMORY;
1524
0
        goto error;
1525
0
    }
1526
0
    memset(enc, 0, sizeof(*enc));
1527
1528
0
    enc->name = xmlMemStrdup(name);
1529
0
    if (enc->name == NULL) {
1530
0
        ret = XML_ERR_NO_MEMORY;
1531
0
        goto error;
1532
0
    }
1533
0
    enc->iconv_in = icv_in;
1534
0
    enc->iconv_out = icv_out;
1535
1536
0
    *out = enc;
1537
0
    return(0);
1538
1539
0
error:
1540
0
    if (enc != NULL)
1541
0
        xmlFree(enc);
1542
0
    if (icv_in != (iconv_t) -1)
1543
0
        iconv_close(icv_in);
1544
0
    if (icv_out != (iconv_t) -1)
1545
0
        iconv_close(icv_out);
1546
0
    return(ret);
1547
0
}
1548
#endif /* LIBXML_ICONV_ENABLED */
1549
1550
#ifdef LIBXML_ICU_ENABLED
1551
static int
1552
openIcuConverter(const char* name, int toUnicode, uconv_t **out)
1553
{
1554
    UErrorCode status;
1555
    uconv_t *conv;
1556
1557
    *out = NULL;
1558
1559
    conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
1560
    if (conv == NULL)
1561
        return(XML_ERR_NO_MEMORY);
1562
1563
    conv->pivot_source = conv->pivot_buf;
1564
    conv->pivot_target = conv->pivot_buf;
1565
1566
    status = U_ZERO_ERROR;
1567
    conv->uconv = ucnv_open(name, &status);
1568
    if (U_FAILURE(status))
1569
        goto error;
1570
1571
    status = U_ZERO_ERROR;
1572
    if (toUnicode) {
1573
        ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
1574
                                                NULL, NULL, NULL, &status);
1575
    }
1576
    else {
1577
        ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
1578
                                                NULL, NULL, NULL, &status);
1579
    }
1580
    if (U_FAILURE(status))
1581
        goto error;
1582
1583
    status = U_ZERO_ERROR;
1584
    conv->utf8 = ucnv_open("UTF-8", &status);
1585
    if (U_FAILURE(status))
1586
        goto error;
1587
1588
    *out = conv;
1589
    return(0);
1590
1591
error:
1592
    if (conv->uconv)
1593
        ucnv_close(conv->uconv);
1594
    xmlFree(conv);
1595
1596
    if (status == U_FILE_ACCESS_ERROR)
1597
        return(XML_ERR_UNSUPPORTED_ENCODING);
1598
    if (status == U_MEMORY_ALLOCATION_ERROR)
1599
        return(XML_ERR_NO_MEMORY);
1600
    return(XML_ERR_SYSTEM);
1601
}
1602
1603
static void
1604
closeIcuConverter(uconv_t *conv)
1605
{
1606
    if (conv == NULL)
1607
        return;
1608
    ucnv_close(conv->uconv);
1609
    ucnv_close(conv->utf8);
1610
    xmlFree(conv);
1611
}
1612
1613
static int
1614
xmlCreateUconvHandler(const char *name, xmlCharEncodingHandler **out) {
1615
    xmlCharEncodingHandlerPtr enc = NULL;
1616
    uconv_t *ucv_in = NULL;
1617
    uconv_t *ucv_out = NULL;
1618
    int ret;
1619
1620
    ret = openIcuConverter(name, 1, &ucv_in);
1621
    if (ret != 0)
1622
        goto error;
1623
    ret = openIcuConverter(name, 0, &ucv_out);
1624
    if (ret != 0)
1625
        goto error;
1626
1627
    enc = (xmlCharEncodingHandlerPtr)
1628
           xmlMalloc(sizeof(xmlCharEncodingHandler));
1629
    if (enc == NULL) {
1630
        ret = XML_ERR_NO_MEMORY;
1631
        goto error;
1632
    }
1633
    memset(enc, 0, sizeof(xmlCharEncodingHandler));
1634
1635
    enc->name = xmlMemStrdup(name);
1636
    if (enc->name == NULL) {
1637
        ret = XML_ERR_NO_MEMORY;
1638
        goto error;
1639
    }
1640
    enc->input = NULL;
1641
    enc->output = NULL;
1642
    enc->uconv_in = ucv_in;
1643
    enc->uconv_out = ucv_out;
1644
1645
    *out = enc;
1646
    return(0);
1647
1648
error:
1649
    if (enc != NULL)
1650
        xmlFree(enc);
1651
    if (ucv_in != NULL)
1652
        closeIcuConverter(ucv_in);
1653
    if (ucv_out != NULL)
1654
        closeIcuConverter(ucv_out);
1655
    return(ret);
1656
}
1657
#endif /* LIBXML_ICU_ENABLED */
1658
1659
/**
1660
 * xmlFindExtraHandler:
1661
 * @name:  a string describing the char encoding.
1662
 * @output:  boolean, use handler for output
1663
 * @out:  pointer to resulting handler
1664
 *
1665
 * Search the non-default handlers for an exact match.
1666
 *
1667
 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1668
 * allocation failed.
1669
 */
1670
static int
1671
xmlFindExtraHandler(const char *name, int output,
1672
0
                    xmlCharEncodingHandler **out) {
1673
0
    int ret;
1674
0
    int i;
1675
1676
0
    (void) ret;
1677
1678
0
    if (handlers != NULL) {
1679
0
        for (i = 0; i < nbCharEncodingHandler; i++) {
1680
0
            xmlCharEncodingHandler *handler = handlers[i];
1681
1682
0
            if (!xmlStrcasecmp((const xmlChar *) name,
1683
0
                               (const xmlChar *) handler->name)) {
1684
0
                if (output) {
1685
0
                    if (handler->output != NULL) {
1686
0
                        *out = handler;
1687
0
                        return(0);
1688
0
                    }
1689
0
                } else {
1690
0
                    if (handler->input != NULL) {
1691
0
                        *out = handler;
1692
0
                        return(0);
1693
0
                    }
1694
0
                }
1695
0
            }
1696
0
        }
1697
0
    }
1698
1699
0
#ifdef LIBXML_ICONV_ENABLED
1700
0
    ret = xmlCreateIconvHandler(name, out);
1701
0
    if (*out != NULL)
1702
0
        return(0);
1703
0
    if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1704
0
        return(ret);
1705
0
#endif /* LIBXML_ICONV_ENABLED */
1706
1707
#ifdef LIBXML_ICU_ENABLED
1708
    ret = xmlCreateUconvHandler(name, out);
1709
    if (*out != NULL)
1710
        return(0);
1711
    if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1712
        return(ret);
1713
#endif /* LIBXML_ICU_ENABLED */
1714
1715
0
    return(XML_ERR_UNSUPPORTED_ENCODING);
1716
0
}
1717
1718
/**
1719
 * xmlFindHandler:
1720
 * @name:  a string describing the char encoding.
1721
 * @output:  boolean, use handler for output
1722
 * @out:  pointer to resulting handler
1723
 *
1724
 * Search all handlers for an exact match.
1725
 *
1726
 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1727
 * allocation failed.
1728
 */
1729
static int
1730
0
xmlFindHandler(const char *name, int output, xmlCharEncodingHandler **out) {
1731
0
    int i;
1732
1733
    /*
1734
     * Check for default handlers
1735
     */
1736
0
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1737
0
        xmlCharEncodingHandler *handler;
1738
1739
0
        handler = (xmlCharEncodingHandler *) &defaultHandlers[i];
1740
1741
0
        if (xmlStrcasecmp((const xmlChar *) name,
1742
0
                          (const xmlChar *) handler->name) == 0) {
1743
0
            if (output) {
1744
0
                if (handler->output != NULL) {
1745
0
                    *out = handler;
1746
0
                    return(0);
1747
0
                }
1748
0
            } else {
1749
0
                if (handler->input != NULL) {
1750
0
                    *out = handler;
1751
0
                    return(0);
1752
0
                }
1753
0
            }
1754
0
        }
1755
0
    }
1756
1757
    /*
1758
     * Check for other handlers
1759
     */
1760
0
    return(xmlFindExtraHandler(name, output, out));
1761
0
}
1762
1763
/**
1764
 * xmlLookupCharEncodingHandler:
1765
 * @enc:  an xmlCharEncoding value.
1766
 * @out:  pointer to result
1767
 *
1768
 * Find or create a handler matching the encoding. If no default or
1769
 * registered handler could be found, try to create a handler using
1770
 * iconv or ICU if supported.
1771
 *
1772
 * The handler must be closed with xmlCharEncCloseFunc.
1773
 *
1774
 * Available since 2.13.0.
1775
 *
1776
 * Returns an xmlParserErrors error code.
1777
 */
1778
int
1779
xmlLookupCharEncodingHandler(xmlCharEncoding enc,
1780
0
                             xmlCharEncodingHandler **out) {
1781
0
    const char *name = NULL;
1782
0
    static const char *const ebcdicNames[] = {
1783
0
        "EBCDIC", "ebcdic", "EBCDIC-US", "IBM-037"
1784
0
    };
1785
0
    static const char *const ucs4Names[] = {
1786
0
        "ISO-10646-UCS-4", "UCS-4", "UCS4"
1787
0
    };
1788
0
    static const char *const ucs2Names[] = {
1789
0
        "ISO-10646-UCS-2", "UCS-2", "UCS2"
1790
0
    };
1791
0
    static const char *const shiftJisNames[] = {
1792
0
        "SHIFT-JIS", "SHIFT_JIS", "Shift_JIS",
1793
0
    };
1794
0
    const char *const *names = NULL;
1795
0
    int numNames = 0;
1796
0
    int ret;
1797
0
    int i;
1798
1799
0
    if (out == NULL)
1800
0
        return(XML_ERR_ARGUMENT);
1801
0
    *out = NULL;
1802
1803
0
    switch (enc) {
1804
0
        case XML_CHAR_ENCODING_ERROR:
1805
0
      return(XML_ERR_UNSUPPORTED_ENCODING);
1806
0
        case XML_CHAR_ENCODING_NONE:
1807
0
      return(0);
1808
0
        case XML_CHAR_ENCODING_UTF8:
1809
0
      return(0);
1810
0
        case XML_CHAR_ENCODING_UTF16LE:
1811
0
      *out = (xmlCharEncodingHandler *) xmlUTF16LEHandler;
1812
0
            return(0);
1813
0
        case XML_CHAR_ENCODING_UTF16BE:
1814
0
      *out = (xmlCharEncodingHandler *) xmlUTF16BEHandler;
1815
0
            return(0);
1816
0
        case XML_CHAR_ENCODING_EBCDIC:
1817
0
            names = ebcdicNames;
1818
0
            numNames = sizeof(ebcdicNames) / sizeof(ebcdicNames[0]);
1819
0
      break;
1820
0
        case XML_CHAR_ENCODING_UCS4BE:
1821
0
        case XML_CHAR_ENCODING_UCS4LE:
1822
0
            names = ucs4Names;
1823
0
            numNames = sizeof(ucs4Names) / sizeof(ucs4Names[0]);
1824
0
      break;
1825
0
        case XML_CHAR_ENCODING_UCS4_2143:
1826
0
      break;
1827
0
        case XML_CHAR_ENCODING_UCS4_3412:
1828
0
      break;
1829
0
        case XML_CHAR_ENCODING_UCS2:
1830
0
            names = ucs2Names;
1831
0
            numNames = sizeof(ucs2Names) / sizeof(ucs2Names[0]);
1832
0
      break;
1833
1834
0
        case XML_CHAR_ENCODING_ASCII:
1835
0
      *out = (xmlCharEncodingHandler *) xmlAsciiHandler;
1836
0
            return(0);
1837
0
        case XML_CHAR_ENCODING_8859_1:
1838
0
      *out = (xmlCharEncodingHandler *) xmlLatin1Handler;
1839
0
            return(0);
1840
0
        case XML_CHAR_ENCODING_8859_2:
1841
0
      name = "ISO-8859-2";
1842
0
      break;
1843
0
        case XML_CHAR_ENCODING_8859_3:
1844
0
      name = "ISO-8859-3";
1845
0
      break;
1846
0
        case XML_CHAR_ENCODING_8859_4:
1847
0
      name = "ISO-8859-4";
1848
0
      break;
1849
0
        case XML_CHAR_ENCODING_8859_5:
1850
0
      name = "ISO-8859-5";
1851
0
      break;
1852
0
        case XML_CHAR_ENCODING_8859_6:
1853
0
      name = "ISO-8859-6";
1854
0
      break;
1855
0
        case XML_CHAR_ENCODING_8859_7:
1856
0
      name = "ISO-8859-7";
1857
0
      break;
1858
0
        case XML_CHAR_ENCODING_8859_8:
1859
0
      name = "ISO-8859-8";
1860
0
      break;
1861
0
        case XML_CHAR_ENCODING_8859_9:
1862
0
      name = "ISO-8859-9";
1863
0
      break;
1864
1865
0
        case XML_CHAR_ENCODING_2022_JP:
1866
0
            name = "ISO-2022-JP";
1867
0
      break;
1868
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1869
0
            names = shiftJisNames;
1870
0
            numNames = sizeof(shiftJisNames) / sizeof(shiftJisNames[0]);
1871
0
      break;
1872
0
        case XML_CHAR_ENCODING_EUC_JP:
1873
0
            name = "EUC-JP";
1874
0
      break;
1875
0
  default:
1876
0
      break;
1877
0
    }
1878
1879
0
    if (name != NULL)
1880
0
        return(xmlFindExtraHandler(name, 0, out));
1881
1882
0
    if (names != NULL) {
1883
0
        for (i = 0; i < numNames; i++) {
1884
0
            ret = xmlFindExtraHandler(names[i], 0, out);
1885
0
            if (*out != NULL)
1886
0
                return(0);
1887
0
            if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1888
0
                return(ret);
1889
0
        }
1890
0
    }
1891
1892
0
    return(XML_ERR_UNSUPPORTED_ENCODING);
1893
0
}
1894
1895
/**
1896
 * xmlGetCharEncodingHandler:
1897
 * @enc:  an xmlCharEncoding value.
1898
 *
1899
 * DEPRECATED: Use xmlLookupCharEncodingHandler which has better error
1900
 * reporting.
1901
 *
1902
 * Returns the handler or NULL if no handler was found or an error
1903
 * occurred.
1904
 */
1905
xmlCharEncodingHandlerPtr
1906
0
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1907
0
    xmlCharEncodingHandler *ret;
1908
1909
0
    xmlLookupCharEncodingHandler(enc, &ret);
1910
0
    return(ret);
1911
0
}
1912
1913
/**
1914
 * xmlOpenCharEncodingHandler:
1915
 * @name:  a string describing the char encoding.
1916
 * @output:  boolean, use handler for output
1917
 * @out:  pointer to result
1918
 *
1919
 * Find or create a handler matching the encoding. If no default or
1920
 * registered handler could be found, try to create a handler using
1921
 * iconv or ICU if supported.
1922
 *
1923
 * The handler must be closed with xmlCharEncCloseFunc.
1924
 *
1925
 * Available since 2.13.0.
1926
 *
1927
 * Returns an xmlParserErrors error code.
1928
 */
1929
int
1930
xmlOpenCharEncodingHandler(const char *name, int output,
1931
0
                           xmlCharEncodingHandler **out) {
1932
0
    const char *nalias;
1933
0
    const char *norig;
1934
0
    xmlCharEncoding enc;
1935
0
    int ret;
1936
1937
0
    if (out == NULL)
1938
0
        return(XML_ERR_ARGUMENT);
1939
0
    *out = NULL;
1940
1941
0
    if (name == NULL)
1942
0
        return(XML_ERR_ARGUMENT);
1943
1944
    /*
1945
     * Do the alias resolution
1946
     */
1947
0
    norig = name;
1948
0
    nalias = xmlGetEncodingAlias(name);
1949
0
    if (nalias != NULL)
1950
0
  name = nalias;
1951
1952
0
    ret = xmlFindHandler(name, output, out);
1953
0
    if (*out != NULL)
1954
0
        return(0);
1955
0
    if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1956
0
        return(ret);
1957
1958
    /*
1959
     * Fallback using the canonical names
1960
     */
1961
0
    enc = xmlParseCharEncoding(norig);
1962
0
    return(xmlLookupCharEncodingHandler(enc, out));
1963
0
}
1964
1965
/**
1966
 * xmlFindCharEncodingHandler:
1967
 * @name:  a string describing the char encoding.
1968
 *
1969
 * DEPRECATED: Use xmlOpenCharEncodingHandler which has better error
1970
 * reporting.
1971
 *
1972
 * Returns the handler or NULL if no handler was found or an error
1973
 * occurred.
1974
 */
1975
xmlCharEncodingHandlerPtr
1976
0
xmlFindCharEncodingHandler(const char *name) {
1977
0
    xmlCharEncodingHandler *ret;
1978
1979
0
    xmlOpenCharEncodingHandler(name, 0, &ret);
1980
0
    return(ret);
1981
0
}
1982
1983
/************************************************************************
1984
 *                  *
1985
 *    ICONV based generic conversion functions    *
1986
 *                  *
1987
 ************************************************************************/
1988
1989
#ifdef LIBXML_ICONV_ENABLED
1990
/**
1991
 * xmlIconvWrapper:
1992
 * @cd:   iconv converter data structure
1993
 * @out:  a pointer to an array of bytes to store the result
1994
 * @outlen:  the length of @out
1995
 * @in:  a pointer to an array of input bytes
1996
 * @inlen:  the length of @in
1997
 *
1998
 * Returns an XML_ENC_ERR code.
1999
 *
2000
 * The value of @inlen after return is the number of octets consumed
2001
 *     as the return value is positive, else unpredictable.
2002
 * The value of @outlen after return is the number of octets produced.
2003
 */
2004
static int
2005
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
2006
0
                const unsigned char *in, int *inlen) {
2007
0
    size_t icv_inlen, icv_outlen;
2008
0
    const char *icv_in = (const char *) in;
2009
0
    char *icv_out = (char *) out;
2010
0
    size_t ret;
2011
2012
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2013
0
        if (outlen != NULL) *outlen = 0;
2014
0
        return(XML_ENC_ERR_INTERNAL);
2015
0
    }
2016
0
    icv_inlen = *inlen;
2017
0
    icv_outlen = *outlen;
2018
    /*
2019
     * Some versions take const, other versions take non-const input.
2020
     */
2021
0
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
2022
0
    *inlen -= icv_inlen;
2023
0
    *outlen -= icv_outlen;
2024
0
    if (ret == (size_t) -1) {
2025
0
        if (errno == EILSEQ)
2026
0
            return(XML_ENC_ERR_INPUT);
2027
0
        if (errno == E2BIG)
2028
0
            return(XML_ENC_ERR_SPACE);
2029
0
        if (errno == EINVAL)
2030
0
            return(XML_ENC_ERR_PARTIAL);
2031
0
        return(XML_ENC_ERR_INTERNAL);
2032
0
    }
2033
0
    return(XML_ENC_ERR_SUCCESS);
2034
0
}
2035
#endif /* LIBXML_ICONV_ENABLED */
2036
2037
/************************************************************************
2038
 *                  *
2039
 *    ICU based generic conversion functions    *
2040
 *                  *
2041
 ************************************************************************/
2042
2043
#ifdef LIBXML_ICU_ENABLED
2044
/**
2045
 * xmlUconvWrapper:
2046
 * @cd: ICU uconverter data structure
2047
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
2048
 * @out:  a pointer to an array of bytes to store the result
2049
 * @outlen:  the length of @out
2050
 * @in:  a pointer to an array of input bytes
2051
 * @inlen:  the length of @in
2052
 *
2053
 * Returns an XML_ENC_ERR code.
2054
 *
2055
 * The value of @inlen after return is the number of octets consumed
2056
 *     as the return value is positive, else unpredictable.
2057
 * The value of @outlen after return is the number of octets produced.
2058
 */
2059
static int
2060
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
2061
                const unsigned char *in, int *inlen) {
2062
    const char *ucv_in = (const char *) in;
2063
    char *ucv_out = (char *) out;
2064
    UErrorCode err = U_ZERO_ERROR;
2065
2066
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2067
        if (outlen != NULL) *outlen = 0;
2068
        return(XML_ENC_ERR_INTERNAL);
2069
    }
2070
2071
    /*
2072
     * Note that the ICU API is stateful. It can always consume a certain
2073
     * amount of input even if the output buffer would overflow. The
2074
     * remaining input must be processed by calling ucnv_convertEx with a
2075
     * possibly empty input buffer.
2076
     *
2077
     * ucnv_convertEx is always called with reset and flush set to 0,
2078
     * so we don't mess up the state. This should never generate
2079
     * U_TRUNCATED_CHAR_FOUND errors.
2080
     *
2081
     * This also means that ICU xmlCharEncodingHandlers should never be
2082
     * reused. It would be a lot nicer if there was a way to emulate the
2083
     * stateless iconv API.
2084
     */
2085
    if (toUnicode) {
2086
        /* encoding => UTF-16 => UTF-8 */
2087
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
2088
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2089
                       &cd->pivot_source, &cd->pivot_target,
2090
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2091
    } else {
2092
        /* UTF-8 => UTF-16 => encoding */
2093
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
2094
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2095
                       &cd->pivot_source, &cd->pivot_target,
2096
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2097
    }
2098
    *inlen = ucv_in - (const char*) in;
2099
    *outlen = ucv_out - (char *) out;
2100
    if (U_SUCCESS(err)) {
2101
        return(XML_ENC_ERR_SUCCESS);
2102
    }
2103
    if (err == U_BUFFER_OVERFLOW_ERROR)
2104
        return(XML_ENC_ERR_SPACE);
2105
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2106
        return(XML_ENC_ERR_INPUT);
2107
    return(XML_ENC_ERR_PARTIAL);
2108
}
2109
#endif /* LIBXML_ICU_ENABLED */
2110
2111
/************************************************************************
2112
 *                  *
2113
 *    The real API used by libxml for on-the-fly conversion *
2114
 *                  *
2115
 ************************************************************************/
2116
2117
/**
2118
 * xmlEncConvertError:
2119
 * @code:  XML_ENC_ERR code
2120
 *
2121
 * Convert XML_ENC_ERR to libxml2 error codes.
2122
 */
2123
static int
2124
0
xmlEncConvertError(int code) {
2125
0
    int ret;
2126
2127
0
    switch (code) {
2128
0
        case XML_ENC_ERR_SUCCESS:
2129
0
            ret = XML_ERR_OK;
2130
0
            break;
2131
0
        case XML_ENC_ERR_INPUT:
2132
0
            ret = XML_ERR_INVALID_ENCODING;
2133
0
            break;
2134
0
        case XML_ENC_ERR_MEMORY:
2135
0
            ret = XML_ERR_NO_MEMORY;
2136
0
            break;
2137
0
        default:
2138
0
            ret = XML_ERR_INTERNAL_ERROR;
2139
0
            break;
2140
0
    }
2141
2142
0
    return(ret);
2143
0
}
2144
2145
/**
2146
 * xmlEncInputChunk:
2147
 * @handler:  encoding handler
2148
 * @out:  a pointer to an array of bytes to store the result
2149
 * @outlen:  the length of @out
2150
 * @in:  a pointer to an array of input bytes
2151
 * @inlen:  the length of @in
2152
 *
2153
 * The value of @inlen after return is the number of octets consumed
2154
 *     as the return value is 0, else unpredictable.
2155
 * The value of @outlen after return is the number of octets produced.
2156
 *
2157
 * Returns an XML_ENC_ERR code.
2158
 */
2159
int
2160
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2161
0
                 int *outlen, const unsigned char *in, int *inlen) {
2162
0
    int ret;
2163
2164
0
    if (handler->input != NULL) {
2165
0
        int oldinlen = *inlen;
2166
2167
0
        ret = handler->input(out, outlen, in, inlen);
2168
0
        if (ret >= 0) {
2169
            /*
2170
             * The built-in converters don't signal XML_ENC_ERR_SPACE.
2171
             */
2172
0
            if (*inlen < oldinlen) {
2173
0
                if (*outlen > 0)
2174
0
                    ret = XML_ENC_ERR_SPACE;
2175
0
                else
2176
0
                    ret = XML_ENC_ERR_PARTIAL;
2177
0
            } else {
2178
0
                ret = XML_ENC_ERR_SUCCESS;
2179
0
            }
2180
0
        }
2181
0
    }
2182
0
#ifdef LIBXML_ICONV_ENABLED
2183
0
    else if (handler->iconv_in != NULL) {
2184
0
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2185
0
    }
2186
0
#endif /* LIBXML_ICONV_ENABLED */
2187
#ifdef LIBXML_ICU_ENABLED
2188
    else if (handler->uconv_in != NULL) {
2189
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2190
    }
2191
#endif /* LIBXML_ICU_ENABLED */
2192
0
    else {
2193
0
        *outlen = 0;
2194
0
        *inlen = 0;
2195
0
        ret = XML_ENC_ERR_INTERNAL;
2196
0
    }
2197
2198
    /* Ignore partial errors when reading. */
2199
0
    if (ret == XML_ENC_ERR_PARTIAL)
2200
0
        ret = XML_ENC_ERR_SUCCESS;
2201
2202
0
    return(ret);
2203
0
}
2204
2205
/**
2206
 * xmlEncOutputChunk:
2207
 * @handler:  encoding handler
2208
 * @out:  a pointer to an array of bytes to store the result
2209
 * @outlen:  the length of @out
2210
 * @in:  a pointer to an array of input bytes
2211
 * @inlen:  the length of @in
2212
 *
2213
 * Returns an XML_ENC_ERR code.
2214
 *
2215
 * The value of @inlen after return is the number of octets consumed
2216
 *     as the return value is 0, else unpredictable.
2217
 * The value of @outlen after return is the number of octets produced.
2218
 */
2219
static int
2220
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2221
0
                  int *outlen, const unsigned char *in, int *inlen) {
2222
0
    int ret;
2223
2224
0
    if (handler->output != NULL) {
2225
0
        int oldinlen = *inlen;
2226
2227
0
        ret = handler->output(out, outlen, in, inlen);
2228
0
        if (ret >= 0) {
2229
            /*
2230
             * The built-in converters don't signal XML_ENC_ERR_SPACE.
2231
             */
2232
0
            if (*inlen < oldinlen) {
2233
0
                if (*outlen > 0)
2234
0
                    ret = XML_ENC_ERR_SPACE;
2235
0
                else
2236
0
                    ret = XML_ENC_ERR_PARTIAL;
2237
0
            } else {
2238
0
                ret = XML_ENC_ERR_SUCCESS;
2239
0
            }
2240
0
        }
2241
0
    }
2242
0
#ifdef LIBXML_ICONV_ENABLED
2243
0
    else if (handler->iconv_out != NULL) {
2244
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2245
0
    }
2246
0
#endif /* LIBXML_ICONV_ENABLED */
2247
#ifdef LIBXML_ICU_ENABLED
2248
    else if (handler->uconv_out != NULL) {
2249
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2250
    }
2251
#endif /* LIBXML_ICU_ENABLED */
2252
0
    else {
2253
0
        *outlen = 0;
2254
0
        *inlen = 0;
2255
0
        ret = XML_ENC_ERR_INTERNAL;
2256
0
    }
2257
2258
    /* We shouldn't generate partial sequences when writing. */
2259
0
    if (ret == XML_ENC_ERR_PARTIAL)
2260
0
        ret = XML_ENC_ERR_INTERNAL;
2261
2262
0
    return(ret);
2263
0
}
2264
2265
/**
2266
 * xmlCharEncFirstLine:
2267
 * @handler:   char encoding transformation data structure
2268
 * @out:  an xmlBuffer for the output.
2269
 * @in:  an xmlBuffer for the input
2270
 *
2271
 * DEPERECATED: Don't use.
2272
 *
2273
 * Returns the number of bytes written or an XML_ENC_ERR code.
2274
 */
2275
int
2276
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2277
0
                    xmlBufferPtr in) {
2278
0
    return(xmlCharEncInFunc(handler, out, in));
2279
0
}
2280
2281
/**
2282
 * xmlCharEncInput:
2283
 * @input: a parser input buffer
2284
 *
2285
 * Generic front-end for the encoding handler on parser input
2286
 *
2287
 * Returns the number of bytes written or an XML_ENC_ERR code.
2288
 */
2289
int
2290
xmlCharEncInput(xmlParserInputBufferPtr input)
2291
0
{
2292
0
    int ret;
2293
0
    size_t avail;
2294
0
    size_t toconv;
2295
0
    int c_in;
2296
0
    int c_out;
2297
0
    xmlBufPtr in;
2298
0
    xmlBufPtr out;
2299
0
    const xmlChar *inData;
2300
0
    size_t inTotal = 0;
2301
2302
0
    if ((input == NULL) || (input->encoder == NULL) ||
2303
0
        (input->buffer == NULL) || (input->raw == NULL))
2304
0
        return(XML_ENC_ERR_INTERNAL);
2305
0
    out = input->buffer;
2306
0
    in = input->raw;
2307
2308
0
    toconv = xmlBufUse(in);
2309
0
    if (toconv == 0)
2310
0
        return (0);
2311
0
    inData = xmlBufContent(in);
2312
0
    inTotal = 0;
2313
2314
0
    do {
2315
0
        c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2316
2317
0
        avail = xmlBufAvail(out);
2318
0
        if (avail > INT_MAX)
2319
0
            avail = INT_MAX;
2320
0
        if (avail < 4096) {
2321
0
            if (xmlBufGrow(out, 4096) < 0) {
2322
0
                input->error = XML_ERR_NO_MEMORY;
2323
0
                return(XML_ENC_ERR_MEMORY);
2324
0
            }
2325
0
            avail = xmlBufAvail(out);
2326
0
        }
2327
2328
0
        c_in = toconv;
2329
0
        c_out = avail;
2330
0
        ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2331
0
                               inData, &c_in);
2332
0
        inTotal += c_in;
2333
0
        inData += c_in;
2334
0
        toconv -= c_in;
2335
0
        xmlBufAddLen(out, c_out);
2336
0
    } while (ret == XML_ENC_ERR_SPACE);
2337
2338
0
    xmlBufShrink(in, inTotal);
2339
2340
0
    if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2341
0
        input->rawconsumed = ULONG_MAX;
2342
0
    else
2343
0
        input->rawconsumed += c_in;
2344
2345
0
    if (((ret != 0) && (c_out == 0)) ||
2346
0
        (ret == XML_ENC_ERR_MEMORY)) {
2347
0
        if (input->error == 0)
2348
0
            input->error = xmlEncConvertError(ret);
2349
0
        return(ret);
2350
0
    }
2351
2352
0
    return (c_out);
2353
0
}
2354
2355
/**
2356
 * xmlCharEncInFunc:
2357
 * @handler:  char encoding transformation data structure
2358
 * @out:  an xmlBuffer for the output.
2359
 * @in:  an xmlBuffer for the input
2360
 *
2361
 * Generic front-end for the encoding handler input function
2362
 *
2363
 * Returns the number of bytes written or an XML_ENC_ERR code.
2364
 */
2365
int
2366
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2367
                 xmlBufferPtr in)
2368
0
{
2369
0
    int ret;
2370
0
    int written;
2371
0
    int toconv;
2372
2373
0
    if (handler == NULL)
2374
0
        return(XML_ENC_ERR_INTERNAL);
2375
0
    if (out == NULL)
2376
0
        return(XML_ENC_ERR_INTERNAL);
2377
0
    if (in == NULL)
2378
0
        return(XML_ENC_ERR_INTERNAL);
2379
2380
0
    toconv = in->use;
2381
0
    if (toconv == 0)
2382
0
        return (0);
2383
0
    written = out->size - out->use -1; /* count '\0' */
2384
0
    if (toconv * 2 >= written) {
2385
0
        xmlBufferGrow(out, out->size + toconv * 2);
2386
0
        written = out->size - out->use - 1;
2387
0
    }
2388
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2389
0
                           in->content, &toconv);
2390
0
    xmlBufferShrink(in, toconv);
2391
0
    out->use += written;
2392
0
    out->content[out->use] = 0;
2393
2394
0
    return (written? written : ret);
2395
0
}
2396
2397
#ifdef LIBXML_OUTPUT_ENABLED
2398
/**
2399
 * xmlCharEncOutput:
2400
 * @output: a parser output buffer
2401
 * @init: is this an initialization call without data
2402
 *
2403
 * Generic front-end for the encoding handler on parser output
2404
 * a first call with @init == 1 has to be made first to initiate the
2405
 * output in case of non-stateless encoding needing to initiate their
2406
 * state or the output (like the BOM in UTF16).
2407
 * In case of UTF8 sequence conversion errors for the given encoder,
2408
 * the content will be automatically remapped to a CharRef sequence.
2409
 *
2410
 * Returns the number of bytes written or an XML_ENC_ERR code.
2411
 */
2412
int
2413
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2414
0
{
2415
0
    int ret;
2416
0
    size_t written;
2417
0
    int writtentot = 0;
2418
0
    size_t toconv;
2419
0
    int c_in;
2420
0
    int c_out;
2421
0
    xmlBufPtr in;
2422
0
    xmlBufPtr out;
2423
2424
0
    if ((output == NULL) || (output->encoder == NULL) ||
2425
0
        (output->buffer == NULL) || (output->conv == NULL))
2426
0
        return(XML_ENC_ERR_INTERNAL);
2427
0
    out = output->conv;
2428
0
    in = output->buffer;
2429
2430
0
retry:
2431
2432
0
    written = xmlBufAvail(out);
2433
2434
    /*
2435
     * First specific handling of the initialization call
2436
     */
2437
0
    if (init) {
2438
0
        c_in = 0;
2439
0
        c_out = written;
2440
        /* TODO: Check return value. */
2441
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2442
0
                          NULL, &c_in);
2443
0
        xmlBufAddLen(out, c_out);
2444
0
        return(c_out);
2445
0
    }
2446
2447
    /*
2448
     * Conversion itself.
2449
     */
2450
0
    toconv = xmlBufUse(in);
2451
0
    if (toconv > 64 * 1024)
2452
0
        toconv = 64 * 1024;
2453
0
    if (toconv * 4 >= written) {
2454
0
        if (xmlBufGrow(out, toconv * 4) < 0) {
2455
0
            ret = XML_ENC_ERR_MEMORY;
2456
0
            goto error;
2457
0
        }
2458
0
        written = xmlBufAvail(out);
2459
0
    }
2460
0
    if (written > 256 * 1024)
2461
0
        written = 256 * 1024;
2462
2463
0
    c_in = toconv;
2464
0
    c_out = written;
2465
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2466
0
                            xmlBufContent(in), &c_in);
2467
0
    xmlBufShrink(in, c_in);
2468
0
    xmlBufAddLen(out, c_out);
2469
0
    writtentot += c_out;
2470
2471
0
    if (ret == XML_ENC_ERR_SPACE)
2472
0
        goto retry;
2473
2474
    /*
2475
     * Attempt to handle error cases
2476
     */
2477
0
    if (ret == XML_ENC_ERR_INPUT) {
2478
0
        xmlChar charref[20];
2479
0
        int len = xmlBufUse(in);
2480
0
        xmlChar *content = xmlBufContent(in);
2481
0
        int cur, charrefLen;
2482
2483
0
        cur = xmlGetUTF8Char(content, &len);
2484
0
        if (cur <= 0)
2485
0
            goto error;
2486
2487
        /*
2488
         * Removes the UTF8 sequence, and replace it by a charref
2489
         * and continue the transcoding phase, hoping the error
2490
         * did not mangle the encoder state.
2491
         */
2492
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2493
0
                         "&#%d;", cur);
2494
0
        xmlBufGrow(out, charrefLen * 4);
2495
0
        c_out = xmlBufAvail(out);
2496
0
        c_in = charrefLen;
2497
0
        ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2498
0
                                charref, &c_in);
2499
0
        if ((ret < 0) || (c_in != charrefLen)) {
2500
0
            ret = XML_ENC_ERR_INTERNAL;
2501
0
            goto error;
2502
0
        }
2503
2504
0
        xmlBufShrink(in, len);
2505
0
        xmlBufAddLen(out, c_out);
2506
0
        writtentot += c_out;
2507
0
        goto retry;
2508
0
    }
2509
2510
0
error:
2511
0
    if (((writtentot <= 0) && (ret != 0)) ||
2512
0
        (ret == XML_ENC_ERR_MEMORY)) {
2513
0
        if (output->error == 0)
2514
0
            output->error = xmlEncConvertError(ret);
2515
0
        return(ret);
2516
0
    }
2517
2518
0
    return(writtentot);
2519
0
}
2520
#endif
2521
2522
/**
2523
 * xmlCharEncOutFunc:
2524
 * @handler:  char encoding transformation data structure
2525
 * @out:  an xmlBuffer for the output.
2526
 * @in:  an xmlBuffer for the input
2527
 *
2528
 * Generic front-end for the encoding handler output function
2529
 * a first call with @in == NULL has to be made firs to initiate the
2530
 * output in case of non-stateless encoding needing to initiate their
2531
 * state or the output (like the BOM in UTF16).
2532
 * In case of UTF8 sequence conversion errors for the given encoder,
2533
 * the content will be automatically remapped to a CharRef sequence.
2534
 *
2535
 * Returns the number of bytes written or an XML_ENC_ERR code.
2536
 */
2537
int
2538
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2539
0
                  xmlBufferPtr in) {
2540
0
    int ret;
2541
0
    int written;
2542
0
    int writtentot = 0;
2543
0
    int toconv;
2544
2545
0
    if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2546
0
    if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2547
2548
0
retry:
2549
2550
0
    written = out->size - out->use;
2551
2552
0
    if (written > 0)
2553
0
  written--; /* Gennady: count '/0' */
2554
2555
    /*
2556
     * First specific handling of in = NULL, i.e. the initialization call
2557
     */
2558
0
    if (in == NULL) {
2559
0
        toconv = 0;
2560
        /* TODO: Check return value. */
2561
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2562
0
                          NULL, &toconv);
2563
0
        out->use += written;
2564
0
        out->content[out->use] = 0;
2565
0
        return(0);
2566
0
    }
2567
2568
    /*
2569
     * Conversion itself.
2570
     */
2571
0
    toconv = in->use;
2572
0
    if (toconv * 4 >= written) {
2573
0
        xmlBufferGrow(out, toconv * 4);
2574
0
  written = out->size - out->use - 1;
2575
0
    }
2576
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2577
0
                            in->content, &toconv);
2578
0
    xmlBufferShrink(in, toconv);
2579
0
    out->use += written;
2580
0
    writtentot += written;
2581
0
    out->content[out->use] = 0;
2582
2583
0
    if (ret == XML_ENC_ERR_SPACE)
2584
0
        goto retry;
2585
2586
    /*
2587
     * Attempt to handle error cases
2588
     */
2589
0
    if (ret == XML_ENC_ERR_INPUT) {
2590
0
        xmlChar charref[20];
2591
0
        int len = in->use;
2592
0
        const xmlChar *utf = (const xmlChar *) in->content;
2593
0
        int cur, charrefLen;
2594
2595
0
        cur = xmlGetUTF8Char(utf, &len);
2596
0
        if (cur <= 0)
2597
0
            return(ret);
2598
2599
        /*
2600
         * Removes the UTF8 sequence, and replace it by a charref
2601
         * and continue the transcoding phase, hoping the error
2602
         * did not mangle the encoder state.
2603
         */
2604
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2605
0
                         "&#%d;", cur);
2606
0
        xmlBufferShrink(in, len);
2607
0
        xmlBufferGrow(out, charrefLen * 4);
2608
0
        written = out->size - out->use - 1;
2609
0
        toconv = charrefLen;
2610
0
        ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2611
0
                                charref, &toconv);
2612
0
        if ((ret < 0) || (toconv != charrefLen))
2613
0
            return(XML_ENC_ERR_INTERNAL);
2614
2615
0
        out->use += written;
2616
0
        writtentot += written;
2617
0
        out->content[out->use] = 0;
2618
0
        goto retry;
2619
0
    }
2620
0
    return(writtentot ? writtentot : ret);
2621
0
}
2622
2623
/**
2624
 * xmlCharEncCloseFunc:
2625
 * @handler:  char encoding transformation data structure
2626
 *
2627
 * Generic front-end for encoding handler close function
2628
 *
2629
 * Returns 0 if success, or -1 in case of error
2630
 */
2631
int
2632
0
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2633
0
    int ret = 0;
2634
0
    int tofree = 0;
2635
0
    int i = 0;
2636
2637
0
    if (handler == NULL) return(-1);
2638
2639
0
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2640
0
        if (handler == &defaultHandlers[i])
2641
0
            return(0);
2642
0
    }
2643
2644
0
    if (handlers != NULL) {
2645
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2646
0
            if (handler == handlers[i])
2647
0
                return(0);
2648
0
  }
2649
0
    }
2650
0
#ifdef LIBXML_ICONV_ENABLED
2651
    /*
2652
     * Iconv handlers can be used only once, free the whole block.
2653
     * and the associated icon resources.
2654
     */
2655
0
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2656
0
        tofree = 1;
2657
0
  if (handler->iconv_out != NULL) {
2658
0
      if (iconv_close(handler->iconv_out))
2659
0
    ret = -1;
2660
0
      handler->iconv_out = NULL;
2661
0
  }
2662
0
  if (handler->iconv_in != NULL) {
2663
0
      if (iconv_close(handler->iconv_in))
2664
0
    ret = -1;
2665
0
      handler->iconv_in = NULL;
2666
0
  }
2667
0
    }
2668
0
#endif /* LIBXML_ICONV_ENABLED */
2669
#ifdef LIBXML_ICU_ENABLED
2670
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2671
        tofree = 1;
2672
  if (handler->uconv_out != NULL) {
2673
      closeIcuConverter(handler->uconv_out);
2674
      handler->uconv_out = NULL;
2675
  }
2676
  if (handler->uconv_in != NULL) {
2677
      closeIcuConverter(handler->uconv_in);
2678
      handler->uconv_in = NULL;
2679
  }
2680
    }
2681
#endif
2682
0
    if (tofree) {
2683
        /* free up only dynamic handlers iconv/uconv */
2684
0
        if (handler->name != NULL)
2685
0
            xmlFree(handler->name);
2686
0
        handler->name = NULL;
2687
0
        xmlFree(handler);
2688
0
    }
2689
2690
0
    return(ret);
2691
0
}
2692
2693
/**
2694
 * xmlByteConsumed:
2695
 * @ctxt: an XML parser context
2696
 *
2697
 * This function provides the current index of the parser relative
2698
 * to the start of the current entity. This function is computed in
2699
 * bytes from the beginning starting at zero and finishing at the
2700
 * size in byte of the file if parsing a file. The function is
2701
 * of constant cost if the input is UTF-8 but can be costly if run
2702
 * on non-UTF-8 input.
2703
 *
2704
 * Returns the index in bytes from the beginning of the entity or -1
2705
 *         in case the index could not be computed.
2706
 */
2707
long
2708
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2709
0
    xmlParserInputPtr in;
2710
2711
0
    if (ctxt == NULL) return(-1);
2712
0
    in = ctxt->input;
2713
0
    if (in == NULL)  return(-1);
2714
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2715
0
        unsigned int unused = 0;
2716
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2717
        /*
2718
   * Encoding conversion, compute the number of unused original
2719
   * bytes from the input not consumed and subtract that from
2720
   * the raw consumed value, this is not a cheap operation
2721
   */
2722
0
        if (in->end - in->cur > 0) {
2723
0
      unsigned char convbuf[32000];
2724
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2725
0
      int toconv = in->end - in->cur, written = 32000;
2726
2727
0
      int ret;
2728
2729
0
            do {
2730
0
                toconv = in->end - cur;
2731
0
                written = 32000;
2732
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2733
0
                                        cur, &toconv);
2734
0
                if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2735
0
                    return(-1);
2736
0
                unused += written;
2737
0
                cur += toconv;
2738
0
            } while (ret == XML_ENC_ERR_SPACE);
2739
0
  }
2740
0
  if (in->buf->rawconsumed < unused)
2741
0
      return(-1);
2742
0
  return(in->buf->rawconsumed - unused);
2743
0
    }
2744
0
    return(in->consumed + (in->cur - in->base));
2745
0
}
2746
2747
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2748
#ifdef LIBXML_ISO8859X_ENABLED
2749
2750
/**
2751
 * UTF8ToISO8859x:
2752
 * @out:  a pointer to an array of bytes to store the result
2753
 * @outlen:  the length of @out
2754
 * @in:  a pointer to an array of UTF-8 chars
2755
 * @inlen:  the length of @in
2756
 * @xlattable: the 2-level transcoding table
2757
 *
2758
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2759
 * block of chars out.
2760
 *
2761
 * Returns the number of bytes written or an XML_ENC_ERR code.
2762
 *
2763
 * The value of @inlen after return is the number of octets consumed
2764
 * as the return value is positive, else unpredictable.
2765
 * The value of @outlen after return is the number of octets consumed.
2766
 */
2767
static int
2768
UTF8ToISO8859x(unsigned char* out, int *outlen,
2769
              const unsigned char* in, int *inlen,
2770
              const unsigned char* const xlattable) {
2771
    const unsigned char* outstart = out;
2772
    const unsigned char* inend;
2773
    const unsigned char* instart = in;
2774
    const unsigned char* processed = in;
2775
2776
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2777
        (xlattable == NULL))
2778
  return(XML_ENC_ERR_INTERNAL);
2779
    if (in == NULL) {
2780
        /*
2781
        * initialization nothing to do
2782
        */
2783
        *outlen = 0;
2784
        *inlen = 0;
2785
        return(0);
2786
    }
2787
    inend = in + (*inlen);
2788
    while (in < inend) {
2789
        unsigned char d = *in++;
2790
        if  (d < 0x80)  {
2791
            *out++ = d;
2792
        } else if (d < 0xC0) {
2793
            /* trailing byte in leading position */
2794
            *outlen = out - outstart;
2795
            *inlen = processed - instart;
2796
            return(XML_ENC_ERR_INPUT);
2797
        } else if (d < 0xE0) {
2798
            unsigned char c;
2799
            if (!(in < inend)) {
2800
                /* trailing byte not in input buffer */
2801
                *outlen = out - outstart;
2802
                *inlen = processed - instart;
2803
                return(XML_ENC_ERR_PARTIAL);
2804
            }
2805
            c = *in++;
2806
            if ((c & 0xC0) != 0x80) {
2807
                /* not a trailing byte */
2808
                *outlen = out - outstart;
2809
                *inlen = processed - instart;
2810
                return(XML_ENC_ERR_INPUT);
2811
            }
2812
            c = c & 0x3F;
2813
            d = d & 0x1F;
2814
            d = xlattable [48 + c + xlattable [d] * 64];
2815
            if (d == 0) {
2816
                /* not in character set */
2817
                *outlen = out - outstart;
2818
                *inlen = processed - instart;
2819
                return(XML_ENC_ERR_INPUT);
2820
            }
2821
            *out++ = d;
2822
        } else if (d < 0xF0) {
2823
            unsigned char c1;
2824
            unsigned char c2;
2825
            if (!(in < inend - 1)) {
2826
                /* trailing bytes not in input buffer */
2827
                *outlen = out - outstart;
2828
                *inlen = processed - instart;
2829
                return(XML_ENC_ERR_PARTIAL);
2830
            }
2831
            c1 = *in++;
2832
            if ((c1 & 0xC0) != 0x80) {
2833
                /* not a trailing byte (c1) */
2834
                *outlen = out - outstart;
2835
                *inlen = processed - instart;
2836
                return(XML_ENC_ERR_INPUT);
2837
            }
2838
            c2 = *in++;
2839
            if ((c2 & 0xC0) != 0x80) {
2840
                /* not a trailing byte (c2) */
2841
                *outlen = out - outstart;
2842
                *inlen = processed - instart;
2843
                return(XML_ENC_ERR_INPUT);
2844
            }
2845
            c1 = c1 & 0x3F;
2846
            c2 = c2 & 0x3F;
2847
      d = d & 0x0F;
2848
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2849
      xlattable [32 + d] * 64] * 64];
2850
            if (d == 0) {
2851
                /* not in character set */
2852
                *outlen = out - outstart;
2853
                *inlen = processed - instart;
2854
                return(XML_ENC_ERR_INPUT);
2855
            }
2856
            *out++ = d;
2857
        } else {
2858
            /* cannot transcode >= U+010000 */
2859
            *outlen = out - outstart;
2860
            *inlen = processed - instart;
2861
            return(XML_ENC_ERR_INPUT);
2862
        }
2863
        processed = in;
2864
    }
2865
    *outlen = out - outstart;
2866
    *inlen = processed - instart;
2867
    return(*outlen);
2868
}
2869
2870
/**
2871
 * ISO8859xToUTF8
2872
 * @out:  a pointer to an array of bytes to store the result
2873
 * @outlen:  the length of @out
2874
 * @in:  a pointer to an array of ISO Latin 1 chars
2875
 * @inlen:  the length of @in
2876
 *
2877
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2878
 * block of chars out.
2879
 *
2880
 * Returns the number of bytes written or an XML_ENC_ERR code.
2881
 *
2882
 * The value of @inlen after return is the number of octets consumed
2883
 * The value of @outlen after return is the number of octets produced.
2884
 */
2885
static int
2886
ISO8859xToUTF8(unsigned char* out, int *outlen,
2887
              const unsigned char* in, int *inlen,
2888
              unsigned short const *unicodetable) {
2889
    unsigned char* outstart = out;
2890
    unsigned char* outend;
2891
    const unsigned char* instart = in;
2892
    const unsigned char* inend;
2893
    const unsigned char* instop;
2894
    unsigned int c;
2895
2896
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2897
        (in == NULL) || (unicodetable == NULL))
2898
  return(XML_ENC_ERR_INTERNAL);
2899
    outend = out + *outlen;
2900
    inend = in + *inlen;
2901
    instop = inend;
2902
2903
    while ((in < inend) && (out < outend - 2)) {
2904
        if (*in >= 0x80) {
2905
            c = unicodetable [*in - 0x80];
2906
            if (c == 0) {
2907
                /* undefined code point */
2908
                *outlen = out - outstart;
2909
                *inlen = in - instart;
2910
                return(XML_ENC_ERR_INPUT);
2911
            }
2912
            if (c < 0x800) {
2913
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2914
                *out++ = (c & 0x3F) | 0x80;
2915
            } else {
2916
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2917
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2918
                *out++ = (c & 0x3F) | 0x80;
2919
            }
2920
            ++in;
2921
        }
2922
        if (instop - in > outend - out) instop = in + (outend - out);
2923
        while ((*in < 0x80) && (in < instop)) {
2924
            *out++ = *in++;
2925
        }
2926
    }
2927
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2928
        *out++ =  *in++;
2929
    }
2930
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2931
        *out++ =  *in++;
2932
    }
2933
    *outlen = out - outstart;
2934
    *inlen = in - instart;
2935
    return (*outlen);
2936
}
2937
2938
2939
/************************************************************************
2940
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2941
 ************************************************************************/
2942
2943
static const unsigned short xmlunicodetable_ISO8859_2 [128] = {
2944
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2945
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2946
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2947
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2948
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2949
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2950
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2951
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2952
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2953
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2954
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2955
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2956
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2957
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2958
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2959
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2960
};
2961
2962
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2963
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2964
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2971
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2972
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2973
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2974
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2975
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2976
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2978
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2979
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2980
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2983
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2984
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2985
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2986
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2987
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2988
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2989
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2990
};
2991
2992
static const unsigned short xmlunicodetable_ISO8859_3 [128] = {
2993
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2994
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2995
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2996
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2997
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2998
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2999
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3000
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3001
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3002
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3003
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3004
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3005
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3006
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3007
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3008
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3009
};
3010
3011
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3012
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3013
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3020
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3021
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3022
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3023
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3024
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3025
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3026
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3029
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3037
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3038
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3039
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3040
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3041
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3042
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3043
};
3044
3045
static const unsigned short xmlunicodetable_ISO8859_4 [128] = {
3046
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3047
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3048
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3049
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3050
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3051
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3052
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3053
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3054
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3055
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3056
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3057
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3058
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3059
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3060
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3061
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3062
};
3063
3064
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3065
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3066
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3073
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3074
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3075
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3076
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3077
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3078
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3079
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3080
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3081
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3082
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3083
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3084
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3085
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3086
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3089
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3090
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3091
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3092
};
3093
3094
static const unsigned short xmlunicodetable_ISO8859_5 [128] = {
3095
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3096
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3097
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3098
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3099
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3100
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3101
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3102
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3103
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3104
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3105
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3106
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3107
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3108
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3109
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3110
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3111
};
3112
3113
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3114
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3122
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3123
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3124
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3126
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3127
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3128
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3129
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3130
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3131
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141
};
3142
3143
static const unsigned short xmlunicodetable_ISO8859_6 [128] = {
3144
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3145
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3146
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3147
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3148
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3149
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3150
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3151
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3152
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3153
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3154
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3155
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3156
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3157
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3158
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3159
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3160
};
3161
3162
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3163
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3171
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3172
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3180
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3181
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3182
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3183
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
};
3187
3188
static const unsigned short xmlunicodetable_ISO8859_7 [128] = {
3189
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3190
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3191
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3192
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3193
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3194
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3195
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3196
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3197
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3198
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3199
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3200
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3201
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3202
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3203
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3204
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3205
};
3206
3207
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3208
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3216
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3217
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3218
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3219
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3232
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3233
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3234
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3235
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3236
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239
};
3240
3241
static const unsigned short xmlunicodetable_ISO8859_8 [128] = {
3242
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3243
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3244
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3245
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3246
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3247
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3248
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3249
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3250
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3251
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3253
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3254
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3255
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3256
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3257
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3258
};
3259
3260
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3261
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3269
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3270
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3271
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3272
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3280
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3285
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3286
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3290
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3291
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292
};
3293
3294
static const unsigned short xmlunicodetable_ISO8859_9 [128] = {
3295
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3296
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3297
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3298
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3299
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3300
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3301
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3302
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3303
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3304
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3305
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3306
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3307
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3308
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3309
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3310
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3311
};
3312
3313
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3314
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3322
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3323
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3324
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3325
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3327
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3335
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
};
3338
3339
static const unsigned short xmlunicodetable_ISO8859_10 [128] = {
3340
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3341
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3342
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3343
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3344
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3345
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3346
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3347
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3348
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3349
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3350
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3351
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3352
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3353
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3354
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3355
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3356
};
3357
3358
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3359
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3367
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3368
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3369
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3371
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3373
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3377
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3378
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3387
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3388
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3389
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3390
};
3391
3392
static const unsigned short xmlunicodetable_ISO8859_11 [128] = {
3393
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3394
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3395
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3396
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3397
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3398
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3399
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3400
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3401
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3402
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3403
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3404
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3405
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3406
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3407
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3408
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3409
};
3410
3411
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3412
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3420
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3421
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3427
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3428
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3431
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3436
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
};
3440
3441
static const unsigned short xmlunicodetable_ISO8859_13 [128] = {
3442
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3443
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3444
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3445
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3446
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3447
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3448
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3449
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3450
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3451
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3452
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3453
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3454
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3455
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3456
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3457
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3458
};
3459
3460
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3461
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3469
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3470
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3471
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3472
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3482
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3484
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3486
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3487
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3488
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3489
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3490
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3491
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3492
};
3493
3494
static const unsigned short xmlunicodetable_ISO8859_14 [128] = {
3495
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3496
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3497
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3498
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3499
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3500
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3501
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3502
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3503
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3504
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3505
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3506
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3507
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3508
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3509
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3510
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3511
};
3512
3513
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3514
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3522
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3523
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3524
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3529
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3534
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3549
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3552
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3553
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3554
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3555
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3556
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3557
};
3558
3559
static const unsigned short xmlunicodetable_ISO8859_15 [128] = {
3560
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3561
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3562
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3563
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3564
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3565
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3566
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3567
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3568
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3569
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3570
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3571
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3572
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3573
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3574
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3575
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3576
};
3577
3578
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3579
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3587
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3588
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3589
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3590
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3597
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3602
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3603
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3604
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3605
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3606
};
3607
3608
static const unsigned short xmlunicodetable_ISO8859_16 [128] = {
3609
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3610
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3611
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3612
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3613
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3614
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3615
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3616
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3617
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3618
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3619
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3620
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3621
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3622
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3623
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3624
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3625
};
3626
3627
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3628
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3636
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3637
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3638
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3639
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3640
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3641
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3647
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3653
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3654
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3656
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3657
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3664
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3665
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3666
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3667
};
3668
3669
3670
/*
3671
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3672
 */
3673
3674
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3675
    const unsigned char* in, int *inlen) {
3676
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3677
}
3678
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3679
    const unsigned char* in, int *inlen) {
3680
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3681
}
3682
3683
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3684
    const unsigned char* in, int *inlen) {
3685
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3686
}
3687
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3688
    const unsigned char* in, int *inlen) {
3689
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3690
}
3691
3692
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3693
    const unsigned char* in, int *inlen) {
3694
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3695
}
3696
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3697
    const unsigned char* in, int *inlen) {
3698
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3699
}
3700
3701
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3702
    const unsigned char* in, int *inlen) {
3703
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3704
}
3705
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3706
    const unsigned char* in, int *inlen) {
3707
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3708
}
3709
3710
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3711
    const unsigned char* in, int *inlen) {
3712
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3713
}
3714
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3715
    const unsigned char* in, int *inlen) {
3716
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3717
}
3718
3719
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3720
    const unsigned char* in, int *inlen) {
3721
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3722
}
3723
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3724
    const unsigned char* in, int *inlen) {
3725
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3726
}
3727
3728
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3729
    const unsigned char* in, int *inlen) {
3730
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3731
}
3732
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3733
    const unsigned char* in, int *inlen) {
3734
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3735
}
3736
3737
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3738
    const unsigned char* in, int *inlen) {
3739
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3740
}
3741
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3742
    const unsigned char* in, int *inlen) {
3743
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3744
}
3745
3746
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3747
    const unsigned char* in, int *inlen) {
3748
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3749
}
3750
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3751
    const unsigned char* in, int *inlen) {
3752
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3753
}
3754
3755
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3756
    const unsigned char* in, int *inlen) {
3757
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3758
}
3759
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3760
    const unsigned char* in, int *inlen) {
3761
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3762
}
3763
3764
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3765
    const unsigned char* in, int *inlen) {
3766
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3767
}
3768
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3769
    const unsigned char* in, int *inlen) {
3770
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3771
}
3772
3773
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3774
    const unsigned char* in, int *inlen) {
3775
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3776
}
3777
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3778
    const unsigned char* in, int *inlen) {
3779
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3780
}
3781
3782
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3783
    const unsigned char* in, int *inlen) {
3784
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3785
}
3786
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3787
    const unsigned char* in, int *inlen) {
3788
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3789
}
3790
3791
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3792
    const unsigned char* in, int *inlen) {
3793
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3794
}
3795
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3796
    const unsigned char* in, int *inlen) {
3797
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3798
}
3799
3800
#endif
3801
#endif
3802