Coverage Report

Created: 2025-07-23 08:13

/src/fontconfig/subprojects/libxml2-2.12.6/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#include <libxml/parser.h>
38
#ifdef LIBXML_HTML_ENABLED
39
#include <libxml/HTMLparser.h>
40
#endif
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
static int xmlLittleEndian = 1;
73
74
#ifdef LIBXML_ICU_ENABLED
75
static uconv_t*
76
openIcuConverter(const char* name, int toUnicode)
77
{
78
  UErrorCode status = U_ZERO_ERROR;
79
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
80
  if (conv == NULL)
81
    return NULL;
82
83
  conv->pivot_source = conv->pivot_buf;
84
  conv->pivot_target = conv->pivot_buf;
85
86
  conv->uconv = ucnv_open(name, &status);
87
  if (U_FAILURE(status))
88
    goto error;
89
90
  status = U_ZERO_ERROR;
91
  if (toUnicode) {
92
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
93
                        NULL, NULL, NULL, &status);
94
  }
95
  else {
96
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
97
                        NULL, NULL, NULL, &status);
98
  }
99
  if (U_FAILURE(status))
100
    goto error;
101
102
  status = U_ZERO_ERROR;
103
  conv->utf8 = ucnv_open("UTF-8", &status);
104
  if (U_SUCCESS(status))
105
    return conv;
106
107
error:
108
  if (conv->uconv)
109
    ucnv_close(conv->uconv);
110
  xmlFree(conv);
111
  return NULL;
112
}
113
114
static void
115
closeIcuConverter(uconv_t *conv)
116
{
117
  if (conv != NULL) {
118
    ucnv_close(conv->uconv);
119
    ucnv_close(conv->utf8);
120
    xmlFree(conv);
121
  }
122
}
123
#endif /* LIBXML_ICU_ENABLED */
124
125
/************************************************************************
126
 *                  *
127
 *    Conversions To/From UTF8 encoding     *
128
 *                  *
129
 ************************************************************************/
130
131
/**
132
 * asciiToUTF8:
133
 * @out:  a pointer to an array of bytes to store the result
134
 * @outlen:  the length of @out
135
 * @in:  a pointer to an array of ASCII chars
136
 * @inlen:  the length of @in
137
 *
138
 * Take a block of ASCII chars in and try to convert it to an UTF-8
139
 * block of chars out.
140
 *
141
 * Returns the number of bytes written or an XML_ENC_ERR code.
142
 *
143
 * The value of @inlen after return is the number of octets consumed
144
 *     if the return value is positive, else unpredictable.
145
 * The value of @outlen after return is the number of octets produced.
146
 */
147
static int
148
asciiToUTF8(unsigned char* out, int *outlen,
149
0
              const unsigned char* in, int *inlen) {
150
0
    unsigned char* outstart = out;
151
0
    const unsigned char* base = in;
152
0
    const unsigned char* processed = in;
153
0
    unsigned char* outend = out + *outlen;
154
0
    const unsigned char* inend;
155
0
    unsigned int c;
156
157
0
    inend = in + (*inlen);
158
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
159
0
  c= *in++;
160
161
0
        if (out >= outend)
162
0
      break;
163
0
        if (c < 0x80) {
164
0
      *out++ = c;
165
0
  } else {
166
0
      *outlen = out - outstart;
167
0
      *inlen = processed - base;
168
0
      return(XML_ENC_ERR_INPUT);
169
0
  }
170
171
0
  processed = (const unsigned char*) in;
172
0
    }
173
0
    *outlen = out - outstart;
174
0
    *inlen = processed - base;
175
0
    return(*outlen);
176
0
}
177
178
#ifdef LIBXML_OUTPUT_ENABLED
179
/**
180
 * UTF8Toascii:
181
 * @out:  a pointer to an array of bytes to store the result
182
 * @outlen:  the length of @out
183
 * @in:  a pointer to an array of UTF-8 chars
184
 * @inlen:  the length of @in
185
 *
186
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
187
 * block of chars out.
188
 *
189
 * Returns the number of bytes written or an XML_ENC_ERR code.
190
 *
191
 * The value of @inlen after return is the number of octets consumed
192
 *     if the return value is positive, else unpredictable.
193
 * The value of @outlen after return is the number of octets produced.
194
 */
195
static int
196
UTF8Toascii(unsigned char* out, int *outlen,
197
0
              const unsigned char* in, int *inlen) {
198
0
    const unsigned char* processed = in;
199
0
    const unsigned char* outend;
200
0
    const unsigned char* outstart = out;
201
0
    const unsigned char* instart = in;
202
0
    const unsigned char* inend;
203
0
    unsigned int c, d;
204
0
    int trailing;
205
206
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
207
0
        return(XML_ENC_ERR_INTERNAL);
208
0
    if (in == NULL) {
209
        /*
210
   * initialization nothing to do
211
   */
212
0
  *outlen = 0;
213
0
  *inlen = 0;
214
0
  return(0);
215
0
    }
216
0
    inend = in + (*inlen);
217
0
    outend = out + (*outlen);
218
0
    while (in < inend) {
219
0
  d = *in++;
220
0
  if      (d < 0x80)  { c= d; trailing= 0; }
221
0
  else if (d < 0xC0) {
222
      /* trailing byte in leading position */
223
0
      *outlen = out - outstart;
224
0
      *inlen = processed - instart;
225
0
      return(XML_ENC_ERR_INPUT);
226
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
227
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
228
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
229
0
  else {
230
      /* no chance for this in Ascii */
231
0
      *outlen = out - outstart;
232
0
      *inlen = processed - instart;
233
0
      return(XML_ENC_ERR_INPUT);
234
0
  }
235
236
0
  if (inend - in < trailing) {
237
0
      break;
238
0
  }
239
240
0
  for ( ; trailing; trailing--) {
241
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
242
0
    break;
243
0
      c <<= 6;
244
0
      c |= d & 0x3F;
245
0
  }
246
247
  /* assertion: c is a single UTF-4 value */
248
0
  if (c < 0x80) {
249
0
      if (out >= outend)
250
0
    break;
251
0
      *out++ = c;
252
0
  } else {
253
      /* no chance for this in Ascii */
254
0
      *outlen = out - outstart;
255
0
      *inlen = processed - instart;
256
0
      return(XML_ENC_ERR_INPUT);
257
0
  }
258
0
  processed = in;
259
0
    }
260
0
    *outlen = out - outstart;
261
0
    *inlen = processed - instart;
262
0
    return(*outlen);
263
0
}
264
#endif /* LIBXML_OUTPUT_ENABLED */
265
266
/**
267
 * isolat1ToUTF8:
268
 * @out:  a pointer to an array of bytes to store the result
269
 * @outlen:  the length of @out
270
 * @in:  a pointer to an array of ISO Latin 1 chars
271
 * @inlen:  the length of @in
272
 *
273
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
274
 * block of chars out.
275
 *
276
 * Returns the number of bytes written or an XML_ENC_ERR code.
277
 *
278
 * The value of @inlen after return is the number of octets consumed
279
 *     if the return value is positive, else unpredictable.
280
 * The value of @outlen after return is the number of octets produced.
281
 */
282
int
283
isolat1ToUTF8(unsigned char* out, int *outlen,
284
0
              const unsigned char* in, int *inlen) {
285
0
    unsigned char* outstart = out;
286
0
    const unsigned char* base = in;
287
0
    unsigned char* outend;
288
0
    const unsigned char* inend;
289
0
    const unsigned char* instop;
290
291
0
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
292
0
  return(XML_ENC_ERR_INTERNAL);
293
294
0
    outend = out + *outlen;
295
0
    inend = in + (*inlen);
296
0
    instop = inend;
297
298
0
    while ((in < inend) && (out < outend - 1)) {
299
0
  if (*in >= 0x80) {
300
0
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
301
0
            *out++ = ((*in) & 0x3F) | 0x80;
302
0
      ++in;
303
0
  }
304
0
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
305
0
  while ((in < instop) && (*in < 0x80)) {
306
0
      *out++ = *in++;
307
0
  }
308
0
    }
309
0
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
310
0
        *out++ = *in++;
311
0
    }
312
0
    *outlen = out - outstart;
313
0
    *inlen = in - base;
314
0
    return(*outlen);
315
0
}
316
317
/**
318
 * UTF8ToUTF8:
319
 * @out:  a pointer to an array of bytes to store the result
320
 * @outlen:  the length of @out
321
 * @inb:  a pointer to an array of UTF-8 chars
322
 * @inlenb:  the length of @in in UTF-8 chars
323
 *
324
 * No op copy operation for UTF8 handling.
325
 *
326
 * Returns the number of bytes written or an XML_ENC_ERR code.
327
 *
328
 *     The value of *inlen after return is the number of octets consumed
329
 *     if the return value is positive, else unpredictable.
330
 */
331
static int
332
UTF8ToUTF8(unsigned char* out, int *outlen,
333
           const unsigned char* inb, int *inlenb)
334
0
{
335
0
    int len;
336
337
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
338
0
  return(XML_ENC_ERR_INTERNAL);
339
0
    if (inb == NULL) {
340
        /* inb == NULL means output is initialized. */
341
0
        *outlen = 0;
342
0
        *inlenb = 0;
343
0
        return(0);
344
0
    }
345
0
    if (*outlen > *inlenb) {
346
0
  len = *inlenb;
347
0
    } else {
348
0
  len = *outlen;
349
0
    }
350
0
    if (len < 0)
351
0
  return(XML_ENC_ERR_INTERNAL);
352
353
    /*
354
     * FIXME: Conversion functions must assure valid UTF-8, so we have
355
     * to check for UTF-8 validity. Preferably, this converter shouldn't
356
     * be used at all.
357
     */
358
0
    memcpy(out, inb, len);
359
360
0
    *outlen = len;
361
0
    *inlenb = len;
362
0
    return(*outlen);
363
0
}
364
365
366
#ifdef LIBXML_OUTPUT_ENABLED
367
/**
368
 * UTF8Toisolat1:
369
 * @out:  a pointer to an array of bytes to store the result
370
 * @outlen:  the length of @out
371
 * @in:  a pointer to an array of UTF-8 chars
372
 * @inlen:  the length of @in
373
 *
374
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
375
 * block of chars out.
376
 *
377
 * Returns the number of bytes written or an XML_ENC_ERR code.
378
 *
379
 * The value of @inlen after return is the number of octets consumed
380
 *     if the return value is positive, else unpredictable.
381
 * The value of @outlen after return is the number of octets produced.
382
 */
383
int
384
UTF8Toisolat1(unsigned char* out, int *outlen,
385
0
              const unsigned char* in, int *inlen) {
386
0
    const unsigned char* processed = in;
387
0
    const unsigned char* outend;
388
0
    const unsigned char* outstart = out;
389
0
    const unsigned char* instart = in;
390
0
    const unsigned char* inend;
391
0
    unsigned int c, d;
392
0
    int trailing;
393
394
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
395
0
        return(XML_ENC_ERR_INTERNAL);
396
0
    if (in == NULL) {
397
        /*
398
   * initialization nothing to do
399
   */
400
0
  *outlen = 0;
401
0
  *inlen = 0;
402
0
  return(0);
403
0
    }
404
0
    inend = in + (*inlen);
405
0
    outend = out + (*outlen);
406
0
    while (in < inend) {
407
0
  d = *in++;
408
0
  if      (d < 0x80)  { c= d; trailing= 0; }
409
0
  else if (d < 0xC0) {
410
      /* trailing byte in leading position */
411
0
      *outlen = out - outstart;
412
0
      *inlen = processed - instart;
413
0
      return(XML_ENC_ERR_INPUT);
414
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
415
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
416
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
417
0
  else {
418
      /* no chance for this in IsoLat1 */
419
0
      *outlen = out - outstart;
420
0
      *inlen = processed - instart;
421
0
      return(XML_ENC_ERR_INPUT);
422
0
  }
423
424
0
  if (inend - in < trailing) {
425
0
      break;
426
0
  }
427
428
0
  for ( ; trailing; trailing--) {
429
0
      if (in >= inend)
430
0
    break;
431
0
      if (((d= *in++) & 0xC0) != 0x80) {
432
0
    *outlen = out - outstart;
433
0
    *inlen = processed - instart;
434
0
    return(XML_ENC_ERR_INPUT);
435
0
      }
436
0
      c <<= 6;
437
0
      c |= d & 0x3F;
438
0
  }
439
440
  /* assertion: c is a single UTF-4 value */
441
0
  if (c <= 0xFF) {
442
0
      if (out >= outend)
443
0
    break;
444
0
      *out++ = c;
445
0
  } else {
446
      /* no chance for this in IsoLat1 */
447
0
      *outlen = out - outstart;
448
0
      *inlen = processed - instart;
449
0
      return(XML_ENC_ERR_INPUT);
450
0
  }
451
0
  processed = in;
452
0
    }
453
0
    *outlen = out - outstart;
454
0
    *inlen = processed - instart;
455
0
    return(*outlen);
456
0
}
457
#endif /* LIBXML_OUTPUT_ENABLED */
458
459
/**
460
 * UTF16LEToUTF8:
461
 * @out:  a pointer to an array of bytes to store the result
462
 * @outlen:  the length of @out
463
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
464
 * @inlenb:  the length of @in in UTF-16LE chars
465
 *
466
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
467
 * block of chars out. This function assumes the endian property
468
 * is the same between the native type of this machine and the
469
 * inputed one.
470
 *
471
 * Returns the number of bytes written or an XML_ENC_ERR code.
472
 *
473
 * The value of *inlen after return is the number of octets consumed
474
 * if the return value is positive, else unpredictable.
475
 */
476
static int
477
UTF16LEToUTF8(unsigned char* out, int *outlen,
478
            const unsigned char* inb, int *inlenb)
479
0
{
480
0
    unsigned char* outstart = out;
481
0
    const unsigned char* processed = inb;
482
0
    unsigned char* outend;
483
0
    unsigned short* in = (unsigned short *) (void *) inb;
484
0
    unsigned short* inend;
485
0
    unsigned int c, d, inlen;
486
0
    unsigned char *tmp;
487
0
    int bits;
488
489
0
    if (*outlen == 0) {
490
0
        *inlenb = 0;
491
0
        return(0);
492
0
    }
493
0
    outend = out + *outlen;
494
0
    if ((*inlenb % 2) == 1)
495
0
        (*inlenb)--;
496
0
    inlen = *inlenb / 2;
497
0
    inend = in + inlen;
498
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
499
0
        if (xmlLittleEndian) {
500
0
      c= *in++;
501
0
  } else {
502
0
      tmp = (unsigned char *) in;
503
0
      c = *tmp++;
504
0
      c = c | (*tmp << 8);
505
0
      in++;
506
0
  }
507
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
508
0
      if (in >= inend) {           /* handle split mutli-byte characters */
509
0
    break;
510
0
      }
511
0
      if (xmlLittleEndian) {
512
0
    d = *in++;
513
0
      } else {
514
0
    tmp = (unsigned char *) in;
515
0
    d = *tmp++;
516
0
    d = d | (*tmp << 8);
517
0
    in++;
518
0
      }
519
0
            if ((d & 0xFC00) == 0xDC00) {
520
0
                c &= 0x03FF;
521
0
                c <<= 10;
522
0
                c |= d & 0x03FF;
523
0
                c += 0x10000;
524
0
            }
525
0
            else {
526
0
    *outlen = out - outstart;
527
0
    *inlenb = processed - inb;
528
0
          return(XML_ENC_ERR_INPUT);
529
0
      }
530
0
        }
531
532
  /* assertion: c is a single UTF-4 value */
533
0
        if (out >= outend)
534
0
      break;
535
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
536
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
537
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
538
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
539
540
0
        for ( ; bits >= 0; bits-= 6) {
541
0
            if (out >= outend)
542
0
          break;
543
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
544
0
        }
545
0
  processed = (const unsigned char*) in;
546
0
    }
547
0
    *outlen = out - outstart;
548
0
    *inlenb = processed - inb;
549
0
    return(*outlen);
550
0
}
551
552
#ifdef LIBXML_OUTPUT_ENABLED
553
/**
554
 * UTF8ToUTF16LE:
555
 * @outb:  a pointer to an array of bytes to store the result
556
 * @outlen:  the length of @outb
557
 * @in:  a pointer to an array of UTF-8 chars
558
 * @inlen:  the length of @in
559
 *
560
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
561
 * block of chars out.
562
 *
563
 * Returns the number of bytes written or an XML_ENC_ERR code.
564
 */
565
static int
566
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567
            const unsigned char* in, int *inlen)
568
0
{
569
0
    unsigned short* out = (unsigned short *) (void *) outb;
570
0
    const unsigned char* processed = in;
571
0
    const unsigned char *const instart = in;
572
0
    unsigned short* outstart= out;
573
0
    unsigned short* outend;
574
0
    const unsigned char* inend;
575
0
    unsigned int c, d;
576
0
    int trailing;
577
0
    unsigned char *tmp;
578
0
    unsigned short tmp1, tmp2;
579
580
    /* UTF16LE encoding has no BOM */
581
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
582
0
        return(XML_ENC_ERR_INTERNAL);
583
0
    if (in == NULL) {
584
0
  *outlen = 0;
585
0
  *inlen = 0;
586
0
  return(0);
587
0
    }
588
0
    inend= in + *inlen;
589
0
    outend = out + (*outlen / 2);
590
0
    while (in < inend) {
591
0
      d= *in++;
592
0
      if      (d < 0x80)  { c= d; trailing= 0; }
593
0
      else if (d < 0xC0) {
594
          /* trailing byte in leading position */
595
0
    *outlen = (out - outstart) * 2;
596
0
    *inlen = processed - instart;
597
0
    return(XML_ENC_ERR_INPUT);
598
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
599
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
600
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
601
0
      else {
602
  /* no chance for this in UTF-16 */
603
0
  *outlen = (out - outstart) * 2;
604
0
  *inlen = processed - instart;
605
0
  return(XML_ENC_ERR_INPUT);
606
0
      }
607
608
0
      if (inend - in < trailing) {
609
0
          break;
610
0
      }
611
612
0
      for ( ; trailing; trailing--) {
613
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
614
0
        break;
615
0
          c <<= 6;
616
0
          c |= d & 0x3F;
617
0
      }
618
619
      /* assertion: c is a single UTF-4 value */
620
0
        if (c < 0x10000) {
621
0
            if (out >= outend)
622
0
          break;
623
0
      if (xmlLittleEndian) {
624
0
    *out++ = c;
625
0
      } else {
626
0
    tmp = (unsigned char *) out;
627
0
    *tmp = (unsigned char) c; /* Explicit truncation */
628
0
    *(tmp + 1) = c >> 8 ;
629
0
    out++;
630
0
      }
631
0
        }
632
0
        else if (c < 0x110000) {
633
0
            if (out+1 >= outend)
634
0
          break;
635
0
            c -= 0x10000;
636
0
      if (xmlLittleEndian) {
637
0
    *out++ = 0xD800 | (c >> 10);
638
0
    *out++ = 0xDC00 | (c & 0x03FF);
639
0
      } else {
640
0
    tmp1 = 0xD800 | (c >> 10);
641
0
    tmp = (unsigned char *) out;
642
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
643
0
    *(tmp + 1) = tmp1 >> 8;
644
0
    out++;
645
646
0
    tmp2 = 0xDC00 | (c & 0x03FF);
647
0
    tmp = (unsigned char *) out;
648
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
649
0
    *(tmp + 1) = tmp2 >> 8;
650
0
    out++;
651
0
      }
652
0
        }
653
0
        else
654
0
      break;
655
0
  processed = in;
656
0
    }
657
0
    *outlen = (out - outstart) * 2;
658
0
    *inlen = processed - instart;
659
0
    return(*outlen);
660
0
}
661
662
/**
663
 * UTF8ToUTF16:
664
 * @outb:  a pointer to an array of bytes to store the result
665
 * @outlen:  the length of @outb
666
 * @in:  a pointer to an array of UTF-8 chars
667
 * @inlen:  the length of @in
668
 *
669
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
670
 * block of chars out.
671
 *
672
 * Returns the number of bytes written or an XML_ENC_ERR code.
673
 */
674
static int
675
UTF8ToUTF16(unsigned char* outb, int *outlen,
676
            const unsigned char* in, int *inlen)
677
0
{
678
0
    if (in == NULL) {
679
  /*
680
   * initialization, add the Byte Order Mark for UTF-16LE
681
   */
682
0
        if (*outlen >= 2) {
683
0
      outb[0] = 0xFF;
684
0
      outb[1] = 0xFE;
685
0
      *outlen = 2;
686
0
      *inlen = 0;
687
0
      return(2);
688
0
  }
689
0
  *outlen = 0;
690
0
  *inlen = 0;
691
0
  return(0);
692
0
    }
693
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
694
0
}
695
#endif /* LIBXML_OUTPUT_ENABLED */
696
697
/**
698
 * UTF16BEToUTF8:
699
 * @out:  a pointer to an array of bytes to store the result
700
 * @outlen:  the length of @out
701
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
702
 * @inlenb:  the length of @in in UTF-16 chars
703
 *
704
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
705
 * block of chars out. This function assumes the endian property
706
 * is the same between the native type of this machine and the
707
 * inputed one.
708
 *
709
 * Returns the number of bytes written or an XML_ENC_ERR code.
710
 *
711
 * The value of *inlen after return is the number of octets consumed
712
 * if the return value is positive, else unpredictable.
713
 */
714
static int
715
UTF16BEToUTF8(unsigned char* out, int *outlen,
716
            const unsigned char* inb, int *inlenb)
717
0
{
718
0
    unsigned char* outstart = out;
719
0
    const unsigned char* processed = inb;
720
0
    unsigned char* outend;
721
0
    unsigned short* in = (unsigned short *) (void *) inb;
722
0
    unsigned short* inend;
723
0
    unsigned int c, d, inlen;
724
0
    unsigned char *tmp;
725
0
    int bits;
726
727
0
    if (*outlen == 0) {
728
0
        *inlenb = 0;
729
0
        return(0);
730
0
    }
731
0
    outend = out + *outlen;
732
0
    if ((*inlenb % 2) == 1)
733
0
        (*inlenb)--;
734
0
    inlen = *inlenb / 2;
735
0
    inend= in + inlen;
736
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
737
0
  if (xmlLittleEndian) {
738
0
      tmp = (unsigned char *) in;
739
0
      c = *tmp++;
740
0
      c = (c << 8) | *tmp;
741
0
      in++;
742
0
  } else {
743
0
      c= *in++;
744
0
  }
745
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
746
0
      if (in >= inend) {           /* handle split mutli-byte characters */
747
0
                break;
748
0
      }
749
0
      if (xmlLittleEndian) {
750
0
    tmp = (unsigned char *) in;
751
0
    d = *tmp++;
752
0
    d = (d << 8) | *tmp;
753
0
    in++;
754
0
      } else {
755
0
    d= *in++;
756
0
      }
757
0
            if ((d & 0xFC00) == 0xDC00) {
758
0
                c &= 0x03FF;
759
0
                c <<= 10;
760
0
                c |= d & 0x03FF;
761
0
                c += 0x10000;
762
0
            }
763
0
            else {
764
0
    *outlen = out - outstart;
765
0
    *inlenb = processed - inb;
766
0
          return(XML_ENC_ERR_INPUT);
767
0
      }
768
0
        }
769
770
  /* assertion: c is a single UTF-4 value */
771
0
        if (out >= outend)
772
0
      break;
773
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
774
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
775
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
776
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
777
778
0
        for ( ; bits >= 0; bits-= 6) {
779
0
            if (out >= outend)
780
0
          break;
781
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
782
0
        }
783
0
  processed = (const unsigned char*) in;
784
0
    }
785
0
    *outlen = out - outstart;
786
0
    *inlenb = processed - inb;
787
0
    return(*outlen);
788
0
}
789
790
#ifdef LIBXML_OUTPUT_ENABLED
791
/**
792
 * UTF8ToUTF16BE:
793
 * @outb:  a pointer to an array of bytes to store the result
794
 * @outlen:  the length of @outb
795
 * @in:  a pointer to an array of UTF-8 chars
796
 * @inlen:  the length of @in
797
 *
798
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
799
 * block of chars out.
800
 *
801
 * Returns the number of bytes written or an XML_ENC_ERR code.
802
 */
803
static int
804
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
805
            const unsigned char* in, int *inlen)
806
0
{
807
0
    unsigned short* out = (unsigned short *) (void *) outb;
808
0
    const unsigned char* processed = in;
809
0
    const unsigned char *const instart = in;
810
0
    unsigned short* outstart= out;
811
0
    unsigned short* outend;
812
0
    const unsigned char* inend;
813
0
    unsigned int c, d;
814
0
    int trailing;
815
0
    unsigned char *tmp;
816
0
    unsigned short tmp1, tmp2;
817
818
    /* UTF-16BE has no BOM */
819
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
820
0
        return(XML_ENC_ERR_INTERNAL);
821
0
    if (in == NULL) {
822
0
  *outlen = 0;
823
0
  *inlen = 0;
824
0
  return(0);
825
0
    }
826
0
    inend= in + *inlen;
827
0
    outend = out + (*outlen / 2);
828
0
    while (in < inend) {
829
0
      d= *in++;
830
0
      if      (d < 0x80)  { c= d; trailing= 0; }
831
0
      else if (d < 0xC0)  {
832
          /* trailing byte in leading position */
833
0
    *outlen = out - outstart;
834
0
    *inlen = processed - instart;
835
0
    return(XML_ENC_ERR_INPUT);
836
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
837
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
838
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
839
0
      else {
840
          /* no chance for this in UTF-16 */
841
0
    *outlen = out - outstart;
842
0
    *inlen = processed - instart;
843
0
    return(XML_ENC_ERR_INPUT);
844
0
      }
845
846
0
      if (inend - in < trailing) {
847
0
          break;
848
0
      }
849
850
0
      for ( ; trailing; trailing--) {
851
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
852
0
          c <<= 6;
853
0
          c |= d & 0x3F;
854
0
      }
855
856
      /* assertion: c is a single UTF-4 value */
857
0
        if (c < 0x10000) {
858
0
            if (out >= outend)  break;
859
0
      if (xmlLittleEndian) {
860
0
    tmp = (unsigned char *) out;
861
0
    *tmp = c >> 8;
862
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
863
0
    out++;
864
0
      } else {
865
0
    *out++ = c;
866
0
      }
867
0
        }
868
0
        else if (c < 0x110000) {
869
0
            if (out+1 >= outend)  break;
870
0
            c -= 0x10000;
871
0
      if (xmlLittleEndian) {
872
0
    tmp1 = 0xD800 | (c >> 10);
873
0
    tmp = (unsigned char *) out;
874
0
    *tmp = tmp1 >> 8;
875
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
876
0
    out++;
877
878
0
    tmp2 = 0xDC00 | (c & 0x03FF);
879
0
    tmp = (unsigned char *) out;
880
0
    *tmp = tmp2 >> 8;
881
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
882
0
    out++;
883
0
      } else {
884
0
    *out++ = 0xD800 | (c >> 10);
885
0
    *out++ = 0xDC00 | (c & 0x03FF);
886
0
      }
887
0
        }
888
0
        else
889
0
      break;
890
0
  processed = in;
891
0
    }
892
0
    *outlen = (out - outstart) * 2;
893
0
    *inlen = processed - instart;
894
0
    return(*outlen);
895
0
}
896
#endif /* LIBXML_OUTPUT_ENABLED */
897
898
/************************************************************************
899
 *                  *
900
 *    Generic encoding handling routines      *
901
 *                  *
902
 ************************************************************************/
903
904
/**
905
 * xmlDetectCharEncoding:
906
 * @in:  a pointer to the first bytes of the XML entity, must be at least
907
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
908
 * @len:  pointer to the length of the buffer
909
 *
910
 * Guess the encoding of the entity using the first bytes of the entity content
911
 * according to the non-normative appendix F of the XML-1.0 recommendation.
912
 *
913
 * Returns one of the XML_CHAR_ENCODING_... values.
914
 */
915
xmlCharEncoding
916
xmlDetectCharEncoding(const unsigned char* in, int len)
917
0
{
918
0
    if (in == NULL)
919
0
        return(XML_CHAR_ENCODING_NONE);
920
0
    if (len >= 4) {
921
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
922
0
      (in[2] == 0x00) && (in[3] == 0x3C))
923
0
      return(XML_CHAR_ENCODING_UCS4BE);
924
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
925
0
      (in[2] == 0x00) && (in[3] == 0x00))
926
0
      return(XML_CHAR_ENCODING_UCS4LE);
927
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
928
0
      (in[2] == 0x3C) && (in[3] == 0x00))
929
0
      return(XML_CHAR_ENCODING_UCS4_2143);
930
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
931
0
      (in[2] == 0x00) && (in[3] == 0x00))
932
0
      return(XML_CHAR_ENCODING_UCS4_3412);
933
0
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
934
0
      (in[2] == 0xA7) && (in[3] == 0x94))
935
0
      return(XML_CHAR_ENCODING_EBCDIC);
936
0
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
937
0
      (in[2] == 0x78) && (in[3] == 0x6D))
938
0
      return(XML_CHAR_ENCODING_UTF8);
939
  /*
940
   * Although not part of the recommendation, we also
941
   * attempt an "auto-recognition" of UTF-16LE and
942
   * UTF-16BE encodings.
943
   */
944
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
945
0
      (in[2] == 0x3F) && (in[3] == 0x00))
946
0
      return(XML_CHAR_ENCODING_UTF16LE);
947
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
948
0
      (in[2] == 0x00) && (in[3] == 0x3F))
949
0
      return(XML_CHAR_ENCODING_UTF16BE);
950
0
    }
951
0
    if (len >= 3) {
952
  /*
953
   * Errata on XML-1.0 June 20 2001
954
   * We now allow an UTF8 encoded BOM
955
   */
956
0
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
957
0
      (in[2] == 0xBF))
958
0
      return(XML_CHAR_ENCODING_UTF8);
959
0
    }
960
    /* For UTF-16 we can recognize by the BOM */
961
0
    if (len >= 2) {
962
0
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
963
0
      return(XML_CHAR_ENCODING_UTF16BE);
964
0
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
965
0
      return(XML_CHAR_ENCODING_UTF16LE);
966
0
    }
967
0
    return(XML_CHAR_ENCODING_NONE);
968
0
}
969
970
/**
971
 * xmlCleanupEncodingAliases:
972
 *
973
 * Unregisters all aliases
974
 */
975
void
976
0
xmlCleanupEncodingAliases(void) {
977
0
    int i;
978
979
0
    if (xmlCharEncodingAliases == NULL)
980
0
  return;
981
982
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
983
0
  if (xmlCharEncodingAliases[i].name != NULL)
984
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
985
0
  if (xmlCharEncodingAliases[i].alias != NULL)
986
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
987
0
    }
988
0
    xmlCharEncodingAliasesNb = 0;
989
0
    xmlCharEncodingAliasesMax = 0;
990
0
    xmlFree(xmlCharEncodingAliases);
991
0
    xmlCharEncodingAliases = NULL;
992
0
}
993
994
/**
995
 * xmlGetEncodingAlias:
996
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
997
 *
998
 * Lookup an encoding name for the given alias.
999
 *
1000
 * Returns NULL if not found, otherwise the original name
1001
 */
1002
const char *
1003
0
xmlGetEncodingAlias(const char *alias) {
1004
0
    int i;
1005
0
    char upper[100];
1006
1007
0
    if (alias == NULL)
1008
0
  return(NULL);
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return(NULL);
1012
1013
0
    for (i = 0;i < 99;i++) {
1014
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1015
0
  if (upper[i] == 0) break;
1016
0
    }
1017
0
    upper[i] = 0;
1018
1019
    /*
1020
     * Walk down the list looking for a definition of the alias
1021
     */
1022
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024
0
      return(xmlCharEncodingAliases[i].name);
1025
0
  }
1026
0
    }
1027
0
    return(NULL);
1028
0
}
1029
1030
/**
1031
 * xmlAddEncodingAlias:
1032
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1033
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1034
 *
1035
 * Registers an alias @alias for an encoding named @name. Existing alias
1036
 * will be overwritten.
1037
 *
1038
 * Returns 0 in case of success, -1 in case of error
1039
 */
1040
int
1041
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1042
0
    int i;
1043
0
    char upper[100];
1044
0
    char *nameCopy, *aliasCopy;
1045
1046
0
    if ((name == NULL) || (alias == NULL))
1047
0
  return(-1);
1048
1049
0
    for (i = 0;i < 99;i++) {
1050
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1051
0
  if (upper[i] == 0) break;
1052
0
    }
1053
0
    upper[i] = 0;
1054
1055
0
    if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1056
0
        xmlCharEncodingAliasPtr tmp;
1057
0
        size_t newSize = xmlCharEncodingAliasesMax ?
1058
0
                         xmlCharEncodingAliasesMax * 2 :
1059
0
                         20;
1060
1061
0
        tmp = (xmlCharEncodingAliasPtr)
1062
0
              xmlRealloc(xmlCharEncodingAliases,
1063
0
                         newSize * sizeof(xmlCharEncodingAlias));
1064
0
        if (tmp == NULL)
1065
0
            return(-1);
1066
0
        xmlCharEncodingAliases = tmp;
1067
0
        xmlCharEncodingAliasesMax = newSize;
1068
0
    }
1069
1070
    /*
1071
     * Walk down the list looking for a definition of the alias
1072
     */
1073
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075
      /*
1076
       * Replace the definition.
1077
       */
1078
0
      nameCopy = xmlMemStrdup(name);
1079
0
            if (nameCopy == NULL)
1080
0
                return(-1);
1081
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1082
0
      xmlCharEncodingAliases[i].name = nameCopy;
1083
0
      return(0);
1084
0
  }
1085
0
    }
1086
    /*
1087
     * Add the definition
1088
     */
1089
0
    nameCopy = xmlMemStrdup(name);
1090
0
    if (nameCopy == NULL)
1091
0
        return(-1);
1092
0
    aliasCopy = xmlMemStrdup(upper);
1093
0
    if (aliasCopy == NULL) {
1094
0
        xmlFree(nameCopy);
1095
0
        return(-1);
1096
0
    }
1097
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1098
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1099
0
    xmlCharEncodingAliasesNb++;
1100
0
    return(0);
1101
0
}
1102
1103
/**
1104
 * xmlDelEncodingAlias:
1105
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1106
 *
1107
 * Unregisters an encoding alias @alias
1108
 *
1109
 * Returns 0 in case of success, -1 in case of error
1110
 */
1111
int
1112
0
xmlDelEncodingAlias(const char *alias) {
1113
0
    int i;
1114
1115
0
    if (alias == NULL)
1116
0
  return(-1);
1117
1118
0
    if (xmlCharEncodingAliases == NULL)
1119
0
  return(-1);
1120
    /*
1121
     * Walk down the list looking for a definition of the alias
1122
     */
1123
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1124
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1125
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1126
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1127
0
      xmlCharEncodingAliasesNb--;
1128
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1129
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1130
0
      return(0);
1131
0
  }
1132
0
    }
1133
0
    return(-1);
1134
0
}
1135
1136
/**
1137
 * xmlParseCharEncoding:
1138
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1139
 *
1140
 * Compare the string to the encoding schemes already known. Note
1141
 * that the comparison is case insensitive accordingly to the section
1142
 * [XML] 4.3.3 Character Encoding in Entities.
1143
 *
1144
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1145
 * if not recognized.
1146
 */
1147
xmlCharEncoding
1148
xmlParseCharEncoding(const char* name)
1149
0
{
1150
0
    const char *alias;
1151
0
    char upper[500];
1152
0
    int i;
1153
1154
0
    if (name == NULL)
1155
0
  return(XML_CHAR_ENCODING_NONE);
1156
1157
    /*
1158
     * Do the alias resolution
1159
     */
1160
0
    alias = xmlGetEncodingAlias(name);
1161
0
    if (alias != NULL)
1162
0
  name = alias;
1163
1164
0
    for (i = 0;i < 499;i++) {
1165
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1166
0
  if (upper[i] == 0) break;
1167
0
    }
1168
0
    upper[i] = 0;
1169
1170
0
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1171
0
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1172
0
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1173
1174
    /*
1175
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1176
     *       already found and in use
1177
     */
1178
0
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1179
0
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1180
1181
0
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182
0
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1183
0
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1184
1185
    /*
1186
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1187
     *       already found and in use
1188
     */
1189
0
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190
0
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
0
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1192
1193
1194
0
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1195
0
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1196
0
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1197
1198
0
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1199
0
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1200
0
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1201
1202
0
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1203
0
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1204
0
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1205
0
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1206
0
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1207
0
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1208
0
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1209
1210
0
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1211
0
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1212
0
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1213
1214
0
    return(XML_CHAR_ENCODING_ERROR);
1215
0
}
1216
1217
/**
1218
 * xmlGetCharEncodingName:
1219
 * @enc:  the encoding
1220
 *
1221
 * The "canonical" name for XML encoding.
1222
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1223
 * Section 4.3.3  Character Encoding in Entities
1224
 *
1225
 * Returns the canonical name for the given encoding
1226
 */
1227
1228
const char*
1229
0
xmlGetCharEncodingName(xmlCharEncoding enc) {
1230
0
    switch (enc) {
1231
0
        case XML_CHAR_ENCODING_ERROR:
1232
0
      return(NULL);
1233
0
        case XML_CHAR_ENCODING_NONE:
1234
0
      return(NULL);
1235
0
        case XML_CHAR_ENCODING_UTF8:
1236
0
      return("UTF-8");
1237
0
        case XML_CHAR_ENCODING_UTF16LE:
1238
0
      return("UTF-16");
1239
0
        case XML_CHAR_ENCODING_UTF16BE:
1240
0
      return("UTF-16");
1241
0
        case XML_CHAR_ENCODING_EBCDIC:
1242
0
            return("EBCDIC");
1243
0
        case XML_CHAR_ENCODING_UCS4LE:
1244
0
            return("ISO-10646-UCS-4");
1245
0
        case XML_CHAR_ENCODING_UCS4BE:
1246
0
            return("ISO-10646-UCS-4");
1247
0
        case XML_CHAR_ENCODING_UCS4_2143:
1248
0
            return("ISO-10646-UCS-4");
1249
0
        case XML_CHAR_ENCODING_UCS4_3412:
1250
0
            return("ISO-10646-UCS-4");
1251
0
        case XML_CHAR_ENCODING_UCS2:
1252
0
            return("ISO-10646-UCS-2");
1253
0
        case XML_CHAR_ENCODING_8859_1:
1254
0
      return("ISO-8859-1");
1255
0
        case XML_CHAR_ENCODING_8859_2:
1256
0
      return("ISO-8859-2");
1257
0
        case XML_CHAR_ENCODING_8859_3:
1258
0
      return("ISO-8859-3");
1259
0
        case XML_CHAR_ENCODING_8859_4:
1260
0
      return("ISO-8859-4");
1261
0
        case XML_CHAR_ENCODING_8859_5:
1262
0
      return("ISO-8859-5");
1263
0
        case XML_CHAR_ENCODING_8859_6:
1264
0
      return("ISO-8859-6");
1265
0
        case XML_CHAR_ENCODING_8859_7:
1266
0
      return("ISO-8859-7");
1267
0
        case XML_CHAR_ENCODING_8859_8:
1268
0
      return("ISO-8859-8");
1269
0
        case XML_CHAR_ENCODING_8859_9:
1270
0
      return("ISO-8859-9");
1271
0
        case XML_CHAR_ENCODING_2022_JP:
1272
0
            return("ISO-2022-JP");
1273
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1274
0
            return("Shift-JIS");
1275
0
        case XML_CHAR_ENCODING_EUC_JP:
1276
0
            return("EUC-JP");
1277
0
  case XML_CHAR_ENCODING_ASCII:
1278
0
      return(NULL);
1279
0
    }
1280
0
    return(NULL);
1281
0
}
1282
1283
/************************************************************************
1284
 *                  *
1285
 *      Char encoding handlers        *
1286
 *                  *
1287
 ************************************************************************/
1288
1289
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1290
    defined(LIBXML_ISO8859X_ENABLED)
1291
1292
#define DECLARE_ISO_FUNCS(n) \
1293
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1294
                                   const unsigned char* in, int *inlen); \
1295
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1296
                                 const unsigned char* in, int *inlen);
1297
1298
/** DOC_DISABLE */
1299
DECLARE_ISO_FUNCS(2)
1300
DECLARE_ISO_FUNCS(3)
1301
DECLARE_ISO_FUNCS(4)
1302
DECLARE_ISO_FUNCS(5)
1303
DECLARE_ISO_FUNCS(6)
1304
DECLARE_ISO_FUNCS(7)
1305
DECLARE_ISO_FUNCS(8)
1306
DECLARE_ISO_FUNCS(9)
1307
DECLARE_ISO_FUNCS(10)
1308
DECLARE_ISO_FUNCS(11)
1309
DECLARE_ISO_FUNCS(13)
1310
DECLARE_ISO_FUNCS(14)
1311
DECLARE_ISO_FUNCS(15)
1312
DECLARE_ISO_FUNCS(16)
1313
/** DOC_ENABLE */
1314
1315
#endif /* LIBXML_ISO8859X_ENABLED */
1316
1317
#ifdef LIBXML_ICONV_ENABLED
1318
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1319
#else
1320
  #define EMPTY_ICONV
1321
#endif
1322
1323
#ifdef LIBXML_ICU_ENABLED
1324
  #define EMPTY_UCONV , NULL, NULL
1325
#else
1326
  #define EMPTY_UCONV
1327
#endif
1328
1329
#define MAKE_HANDLER(name, in, out) \
1330
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1331
1332
static const xmlCharEncodingHandler defaultHandlers[] = {
1333
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1334
#ifdef LIBXML_OUTPUT_ENABLED
1335
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1336
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1337
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1338
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1339
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1340
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1341
#ifdef LIBXML_HTML_ENABLED
1342
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1343
#endif
1344
#else
1345
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1346
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1347
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1348
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1349
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1350
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1351
#endif /* LIBXML_OUTPUT_ENABLED */
1352
1353
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1354
    defined(LIBXML_ISO8859X_ENABLED)
1355
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1356
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1357
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1358
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1359
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1360
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1361
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1362
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1363
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1364
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1365
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1366
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1367
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1368
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1369
#endif
1370
};
1371
1372
#define NUM_DEFAULT_HANDLERS \
1373
0
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1374
1375
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1376
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1377
1378
/* the size should be growable, but it's not a big deal ... */
1379
0
#define MAX_ENCODING_HANDLERS 50
1380
static xmlCharEncodingHandlerPtr *handlers = NULL;
1381
static int nbCharEncodingHandler = 0;
1382
1383
/**
1384
 * xmlNewCharEncodingHandler:
1385
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1386
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1387
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1388
 *
1389
 * Create and registers an xmlCharEncodingHandler.
1390
 *
1391
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1392
 */
1393
xmlCharEncodingHandlerPtr
1394
xmlNewCharEncodingHandler(const char *name,
1395
                          xmlCharEncodingInputFunc input,
1396
0
                          xmlCharEncodingOutputFunc output) {
1397
0
    xmlCharEncodingHandlerPtr handler;
1398
0
    const char *alias;
1399
0
    char upper[500];
1400
0
    int i;
1401
0
    char *up = NULL;
1402
1403
    /*
1404
     * Do the alias resolution
1405
     */
1406
0
    alias = xmlGetEncodingAlias(name);
1407
0
    if (alias != NULL)
1408
0
  name = alias;
1409
1410
    /*
1411
     * Keep only the uppercase version of the encoding.
1412
     */
1413
0
    if (name == NULL)
1414
0
  return(NULL);
1415
0
    for (i = 0;i < 499;i++) {
1416
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1417
0
  if (upper[i] == 0) break;
1418
0
    }
1419
0
    upper[i] = 0;
1420
0
    up = xmlMemStrdup(upper);
1421
0
    if (up == NULL)
1422
0
  return(NULL);
1423
1424
    /*
1425
     * allocate and fill-up an handler block.
1426
     */
1427
0
    handler = (xmlCharEncodingHandlerPtr)
1428
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1429
0
    if (handler == NULL) {
1430
0
        xmlFree(up);
1431
0
  return(NULL);
1432
0
    }
1433
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1434
0
    handler->input = input;
1435
0
    handler->output = output;
1436
0
    handler->name = up;
1437
1438
0
#ifdef LIBXML_ICONV_ENABLED
1439
0
    handler->iconv_in = NULL;
1440
0
    handler->iconv_out = NULL;
1441
0
#endif
1442
#ifdef LIBXML_ICU_ENABLED
1443
    handler->uconv_in = NULL;
1444
    handler->uconv_out = NULL;
1445
#endif
1446
1447
    /*
1448
     * registers and returns the handler.
1449
     */
1450
0
    xmlRegisterCharEncodingHandler(handler);
1451
0
    return(handler);
1452
0
}
1453
1454
/**
1455
 * xmlInitCharEncodingHandlers:
1456
 *
1457
 * DEPRECATED: Alias for xmlInitParser.
1458
 */
1459
void
1460
0
xmlInitCharEncodingHandlers(void) {
1461
0
    xmlInitParser();
1462
0
}
1463
1464
/**
1465
 * xmlInitEncodingInternal:
1466
 *
1467
 * Initialize the char encoding support.
1468
 */
1469
void
1470
11
xmlInitEncodingInternal(void) {
1471
11
    unsigned short int tst = 0x1234;
1472
11
    unsigned char *ptr = (unsigned char *) &tst;
1473
1474
11
    if (*ptr == 0x12) xmlLittleEndian = 0;
1475
11
    else xmlLittleEndian = 1;
1476
11
}
1477
1478
/**
1479
 * xmlCleanupCharEncodingHandlers:
1480
 *
1481
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1482
 * to free global state but see the warnings there. xmlCleanupParser
1483
 * should be only called once at program exit. In most cases, you don't
1484
 * have call cleanup functions at all.
1485
 *
1486
 * Cleanup the memory allocated for the char encoding support, it
1487
 * unregisters all the encoding handlers and the aliases.
1488
 */
1489
void
1490
0
xmlCleanupCharEncodingHandlers(void) {
1491
0
    xmlCleanupEncodingAliases();
1492
1493
0
    if (handlers == NULL) return;
1494
1495
0
    for (;nbCharEncodingHandler > 0;) {
1496
0
        nbCharEncodingHandler--;
1497
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1498
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1499
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1500
0
      xmlFree(handlers[nbCharEncodingHandler]);
1501
0
  }
1502
0
    }
1503
0
    xmlFree(handlers);
1504
0
    handlers = NULL;
1505
0
    nbCharEncodingHandler = 0;
1506
0
}
1507
1508
/**
1509
 * xmlRegisterCharEncodingHandler:
1510
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1511
 *
1512
 * Register the char encoding handler, surprising, isn't it ?
1513
 */
1514
void
1515
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1516
0
    if (handler == NULL)
1517
0
        return;
1518
0
    if (handlers == NULL) {
1519
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1520
0
        if (handlers == NULL)
1521
0
            goto free_handler;
1522
0
    }
1523
1524
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1525
0
        goto free_handler;
1526
0
    handlers[nbCharEncodingHandler++] = handler;
1527
0
    return;
1528
1529
0
free_handler:
1530
0
    if (handler != NULL) {
1531
0
        if (handler->name != NULL) {
1532
0
            xmlFree(handler->name);
1533
0
        }
1534
0
        xmlFree(handler);
1535
0
    }
1536
0
}
1537
1538
/**
1539
 * xmlGetCharEncodingHandler:
1540
 * @enc:  an xmlCharEncoding value.
1541
 *
1542
 * Search in the registered set the handler able to read/write that encoding.
1543
 *
1544
 * Returns the handler or NULL if not found
1545
 */
1546
xmlCharEncodingHandlerPtr
1547
11
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1548
11
    xmlCharEncodingHandlerPtr handler;
1549
1550
11
    switch (enc) {
1551
0
        case XML_CHAR_ENCODING_ERROR:
1552
0
      return(NULL);
1553
11
        case XML_CHAR_ENCODING_NONE:
1554
11
      return(NULL);
1555
0
        case XML_CHAR_ENCODING_UTF8:
1556
0
      return(NULL);
1557
0
        case XML_CHAR_ENCODING_UTF16LE:
1558
0
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1559
0
        case XML_CHAR_ENCODING_UTF16BE:
1560
0
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1561
0
        case XML_CHAR_ENCODING_EBCDIC:
1562
0
            handler = xmlFindCharEncodingHandler("EBCDIC");
1563
0
            if (handler != NULL) return(handler);
1564
0
            handler = xmlFindCharEncodingHandler("ebcdic");
1565
0
            if (handler != NULL) return(handler);
1566
0
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1567
0
            if (handler != NULL) return(handler);
1568
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1569
0
            if (handler != NULL) return(handler);
1570
0
      break;
1571
0
        case XML_CHAR_ENCODING_UCS4BE:
1572
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1573
0
            if (handler != NULL) return(handler);
1574
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1575
0
            if (handler != NULL) return(handler);
1576
0
            handler = xmlFindCharEncodingHandler("UCS4");
1577
0
            if (handler != NULL) return(handler);
1578
0
      break;
1579
0
        case XML_CHAR_ENCODING_UCS4LE:
1580
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1581
0
            if (handler != NULL) return(handler);
1582
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1583
0
            if (handler != NULL) return(handler);
1584
0
            handler = xmlFindCharEncodingHandler("UCS4");
1585
0
            if (handler != NULL) return(handler);
1586
0
      break;
1587
0
        case XML_CHAR_ENCODING_UCS4_2143:
1588
0
      break;
1589
0
        case XML_CHAR_ENCODING_UCS4_3412:
1590
0
      break;
1591
0
        case XML_CHAR_ENCODING_UCS2:
1592
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1593
0
            if (handler != NULL) return(handler);
1594
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1595
0
            if (handler != NULL) return(handler);
1596
0
            handler = xmlFindCharEncodingHandler("UCS2");
1597
0
            if (handler != NULL) return(handler);
1598
0
      break;
1599
1600
      /*
1601
       * We used to keep ISO Latin encodings native in the
1602
       * generated data. This led to so many problems that
1603
       * this has been removed. One can still change this
1604
       * back by registering no-ops encoders for those
1605
       */
1606
0
        case XML_CHAR_ENCODING_8859_1:
1607
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1608
0
      if (handler != NULL) return(handler);
1609
0
      break;
1610
0
        case XML_CHAR_ENCODING_8859_2:
1611
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1612
0
      if (handler != NULL) return(handler);
1613
0
      break;
1614
0
        case XML_CHAR_ENCODING_8859_3:
1615
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1616
0
      if (handler != NULL) return(handler);
1617
0
      break;
1618
0
        case XML_CHAR_ENCODING_8859_4:
1619
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1620
0
      if (handler != NULL) return(handler);
1621
0
      break;
1622
0
        case XML_CHAR_ENCODING_8859_5:
1623
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1624
0
      if (handler != NULL) return(handler);
1625
0
      break;
1626
0
        case XML_CHAR_ENCODING_8859_6:
1627
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1628
0
      if (handler != NULL) return(handler);
1629
0
      break;
1630
0
        case XML_CHAR_ENCODING_8859_7:
1631
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1632
0
      if (handler != NULL) return(handler);
1633
0
      break;
1634
0
        case XML_CHAR_ENCODING_8859_8:
1635
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1636
0
      if (handler != NULL) return(handler);
1637
0
      break;
1638
0
        case XML_CHAR_ENCODING_8859_9:
1639
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1640
0
      if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
1644
0
        case XML_CHAR_ENCODING_2022_JP:
1645
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1646
0
            if (handler != NULL) return(handler);
1647
0
      break;
1648
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1649
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1650
0
            if (handler != NULL) return(handler);
1651
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1652
0
            if (handler != NULL) return(handler);
1653
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1654
0
            if (handler != NULL) return(handler);
1655
0
      break;
1656
0
        case XML_CHAR_ENCODING_EUC_JP:
1657
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1658
0
            if (handler != NULL) return(handler);
1659
0
      break;
1660
0
  default:
1661
0
      break;
1662
11
    }
1663
1664
0
    return(NULL);
1665
11
}
1666
1667
/**
1668
 * xmlFindCharEncodingHandler:
1669
 * @name:  a string describing the char encoding.
1670
 *
1671
 * Search in the registered set the handler able to read/write that encoding
1672
 * or create a new one.
1673
 *
1674
 * Returns the handler or NULL if not found
1675
 */
1676
xmlCharEncodingHandlerPtr
1677
0
xmlFindCharEncodingHandler(const char *name) {
1678
0
    const char *nalias;
1679
0
    const char *norig;
1680
0
    xmlCharEncoding alias;
1681
0
#ifdef LIBXML_ICONV_ENABLED
1682
0
    xmlCharEncodingHandlerPtr enc;
1683
0
    iconv_t icv_in, icv_out;
1684
0
#endif /* LIBXML_ICONV_ENABLED */
1685
#ifdef LIBXML_ICU_ENABLED
1686
    xmlCharEncodingHandlerPtr encu;
1687
    uconv_t *ucv_in, *ucv_out;
1688
#endif /* LIBXML_ICU_ENABLED */
1689
0
    char upper[100];
1690
0
    int i;
1691
1692
0
    if (name == NULL) return(NULL);
1693
0
    if (name[0] == 0) return(NULL);
1694
1695
    /*
1696
     * Do the alias resolution
1697
     */
1698
0
    norig = name;
1699
0
    nalias = xmlGetEncodingAlias(name);
1700
0
    if (nalias != NULL)
1701
0
  name = nalias;
1702
1703
    /*
1704
     * Check first for directly registered encoding names
1705
     */
1706
0
    for (i = 0;i < 99;i++) {
1707
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1708
0
  if (upper[i] == 0) break;
1709
0
    }
1710
0
    upper[i] = 0;
1711
1712
0
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1713
0
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1714
0
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1715
0
    }
1716
1717
0
    if (handlers != NULL) {
1718
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1719
0
            if (!strcmp(upper, handlers[i]->name)) {
1720
0
                return(handlers[i]);
1721
0
            }
1722
0
        }
1723
0
    }
1724
1725
0
#ifdef LIBXML_ICONV_ENABLED
1726
    /* check whether iconv can handle this */
1727
0
    icv_in = iconv_open("UTF-8", name);
1728
0
    icv_out = iconv_open(name, "UTF-8");
1729
0
    if (icv_in == (iconv_t) -1) {
1730
0
        icv_in = iconv_open("UTF-8", upper);
1731
0
    }
1732
0
    if (icv_out == (iconv_t) -1) {
1733
0
  icv_out = iconv_open(upper, "UTF-8");
1734
0
    }
1735
0
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1736
0
      enc = (xmlCharEncodingHandlerPtr)
1737
0
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1738
0
      if (enc == NULL) {
1739
0
          iconv_close(icv_in);
1740
0
          iconv_close(icv_out);
1741
0
    return(NULL);
1742
0
      }
1743
0
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1744
0
      enc->name = xmlMemStrdup(name);
1745
0
            if (enc->name == NULL) {
1746
0
                xmlFree(enc);
1747
0
                iconv_close(icv_in);
1748
0
                iconv_close(icv_out);
1749
0
                return(NULL);
1750
0
            }
1751
0
      enc->input = NULL;
1752
0
      enc->output = NULL;
1753
0
      enc->iconv_in = icv_in;
1754
0
      enc->iconv_out = icv_out;
1755
0
      return enc;
1756
0
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757
0
      if (icv_in != (iconv_t) -1)
1758
0
    iconv_close(icv_in);
1759
0
      else
1760
0
    iconv_close(icv_out);
1761
0
    }
1762
0
#endif /* LIBXML_ICONV_ENABLED */
1763
#ifdef LIBXML_ICU_ENABLED
1764
    /* check whether icu can handle this */
1765
    ucv_in = openIcuConverter(name, 1);
1766
    ucv_out = openIcuConverter(name, 0);
1767
    if (ucv_in != NULL && ucv_out != NULL) {
1768
      encu = (xmlCharEncodingHandlerPtr)
1769
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1770
      if (encu == NULL) {
1771
                closeIcuConverter(ucv_in);
1772
                closeIcuConverter(ucv_out);
1773
    return(NULL);
1774
      }
1775
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1776
      encu->name = xmlMemStrdup(name);
1777
            if (encu->name == NULL) {
1778
                xmlFree(encu);
1779
                closeIcuConverter(ucv_in);
1780
                closeIcuConverter(ucv_out);
1781
                return(NULL);
1782
            }
1783
      encu->input = NULL;
1784
      encu->output = NULL;
1785
      encu->uconv_in = ucv_in;
1786
      encu->uconv_out = ucv_out;
1787
      return encu;
1788
    } else if (ucv_in != NULL || ucv_out != NULL) {
1789
            closeIcuConverter(ucv_in);
1790
            closeIcuConverter(ucv_out);
1791
    }
1792
#endif /* LIBXML_ICU_ENABLED */
1793
1794
    /*
1795
     * Fallback using the canonical names
1796
     */
1797
0
    alias = xmlParseCharEncoding(norig);
1798
0
    if (alias != XML_CHAR_ENCODING_ERROR) {
1799
0
        const char* canon;
1800
0
        canon = xmlGetCharEncodingName(alias);
1801
0
        if ((canon != NULL) && (strcmp(name, canon))) {
1802
0
      return(xmlFindCharEncodingHandler(canon));
1803
0
        }
1804
0
    }
1805
1806
    /* If "none of the above", give up */
1807
0
    return(NULL);
1808
0
}
1809
1810
/************************************************************************
1811
 *                  *
1812
 *    ICONV based generic conversion functions    *
1813
 *                  *
1814
 ************************************************************************/
1815
1816
#ifdef LIBXML_ICONV_ENABLED
1817
/**
1818
 * xmlIconvWrapper:
1819
 * @cd:   iconv converter data structure
1820
 * @out:  a pointer to an array of bytes to store the result
1821
 * @outlen:  the length of @out
1822
 * @in:  a pointer to an array of input bytes
1823
 * @inlen:  the length of @in
1824
 *
1825
 * Returns an XML_ENC_ERR code.
1826
 *
1827
 * The value of @inlen after return is the number of octets consumed
1828
 *     as the return value is positive, else unpredictable.
1829
 * The value of @outlen after return is the number of octets produced.
1830
 */
1831
static int
1832
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1833
0
                const unsigned char *in, int *inlen) {
1834
0
    size_t icv_inlen, icv_outlen;
1835
0
    const char *icv_in = (const char *) in;
1836
0
    char *icv_out = (char *) out;
1837
0
    size_t ret;
1838
1839
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1840
0
        if (outlen != NULL) *outlen = 0;
1841
0
        return(XML_ENC_ERR_INTERNAL);
1842
0
    }
1843
0
    icv_inlen = *inlen;
1844
0
    icv_outlen = *outlen;
1845
    /*
1846
     * Some versions take const, other versions take non-const input.
1847
     */
1848
0
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1849
0
    *inlen -= icv_inlen;
1850
0
    *outlen -= icv_outlen;
1851
0
    if (ret == (size_t) -1) {
1852
0
        if (errno == EILSEQ)
1853
0
            return(XML_ENC_ERR_INPUT);
1854
0
        if (errno == E2BIG)
1855
0
            return(XML_ENC_ERR_SPACE);
1856
0
        if (errno == EINVAL)
1857
0
            return(XML_ENC_ERR_PARTIAL);
1858
0
        return(XML_ENC_ERR_INTERNAL);
1859
0
    }
1860
0
    return(XML_ENC_ERR_SUCCESS);
1861
0
}
1862
#endif /* LIBXML_ICONV_ENABLED */
1863
1864
/************************************************************************
1865
 *                  *
1866
 *    ICU based generic conversion functions    *
1867
 *                  *
1868
 ************************************************************************/
1869
1870
#ifdef LIBXML_ICU_ENABLED
1871
/**
1872
 * xmlUconvWrapper:
1873
 * @cd: ICU uconverter data structure
1874
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1875
 * @out:  a pointer to an array of bytes to store the result
1876
 * @outlen:  the length of @out
1877
 * @in:  a pointer to an array of input bytes
1878
 * @inlen:  the length of @in
1879
 *
1880
 * Returns an XML_ENC_ERR code.
1881
 *
1882
 * The value of @inlen after return is the number of octets consumed
1883
 *     as the return value is positive, else unpredictable.
1884
 * The value of @outlen after return is the number of octets produced.
1885
 */
1886
static int
1887
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1888
                const unsigned char *in, int *inlen) {
1889
    const char *ucv_in = (const char *) in;
1890
    char *ucv_out = (char *) out;
1891
    UErrorCode err = U_ZERO_ERROR;
1892
1893
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1894
        if (outlen != NULL) *outlen = 0;
1895
        return(XML_ENC_ERR_INTERNAL);
1896
    }
1897
1898
    /*
1899
     * Note that the ICU API is stateful. It can always consume a certain
1900
     * amount of input even if the output buffer would overflow. The
1901
     * remaining input must be processed by calling ucnv_convertEx with a
1902
     * possibly empty input buffer.
1903
     *
1904
     * ucnv_convertEx is always called with reset and flush set to 0,
1905
     * so we don't mess up the state. This should never generate
1906
     * U_TRUNCATED_CHAR_FOUND errors.
1907
     *
1908
     * This also means that ICU xmlCharEncodingHandlers should never be
1909
     * reused. It would be a lot nicer if there was a way to emulate the
1910
     * stateless iconv API.
1911
     */
1912
    if (toUnicode) {
1913
        /* encoding => UTF-16 => UTF-8 */
1914
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1915
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1916
                       &cd->pivot_source, &cd->pivot_target,
1917
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1918
    } else {
1919
        /* UTF-8 => UTF-16 => encoding */
1920
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1921
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1922
                       &cd->pivot_source, &cd->pivot_target,
1923
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1924
    }
1925
    *inlen = ucv_in - (const char*) in;
1926
    *outlen = ucv_out - (char *) out;
1927
    if (U_SUCCESS(err)) {
1928
        return(XML_ENC_ERR_SUCCESS);
1929
    }
1930
    if (err == U_BUFFER_OVERFLOW_ERROR)
1931
        return(XML_ENC_ERR_SPACE);
1932
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1933
        return(XML_ENC_ERR_INPUT);
1934
    return(XML_ENC_ERR_PARTIAL);
1935
}
1936
#endif /* LIBXML_ICU_ENABLED */
1937
1938
/************************************************************************
1939
 *                  *
1940
 *    The real API used by libxml for on-the-fly conversion *
1941
 *                  *
1942
 ************************************************************************/
1943
1944
/**
1945
 * xmlEncConvertError:
1946
 * @code:  XML_ENC_ERR code
1947
 *
1948
 * Convert XML_ENC_ERR to libxml2 error codes.
1949
 */
1950
static int
1951
0
xmlEncConvertError(int code) {
1952
0
    int ret;
1953
1954
0
    switch (code) {
1955
0
        case XML_ENC_ERR_SUCCESS:
1956
0
            ret = XML_ERR_OK;
1957
0
            break;
1958
0
        case XML_ENC_ERR_INPUT:
1959
0
            ret = XML_ERR_INVALID_ENCODING;
1960
0
            break;
1961
0
        case XML_ENC_ERR_MEMORY:
1962
0
            ret = XML_ERR_NO_MEMORY;
1963
0
            break;
1964
0
        default:
1965
0
            ret = XML_ERR_INTERNAL_ERROR;
1966
0
            break;
1967
0
    }
1968
1969
0
    return(ret);
1970
0
}
1971
1972
/**
1973
 * xmlEncInputChunk:
1974
 * @handler:  encoding handler
1975
 * @out:  a pointer to an array of bytes to store the result
1976
 * @outlen:  the length of @out
1977
 * @in:  a pointer to an array of input bytes
1978
 * @inlen:  the length of @in
1979
 *
1980
 * The value of @inlen after return is the number of octets consumed
1981
 *     as the return value is 0, else unpredictable.
1982
 * The value of @outlen after return is the number of octets produced.
1983
 *
1984
 * Returns an XML_ENC_ERR code.
1985
 */
1986
int
1987
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1988
0
                 int *outlen, const unsigned char *in, int *inlen) {
1989
0
    int ret;
1990
1991
0
    if (handler->input != NULL) {
1992
0
        int oldinlen = *inlen;
1993
1994
0
        ret = handler->input(out, outlen, in, inlen);
1995
0
        if (ret >= 0) {
1996
            /*
1997
             * The built-in converters don't signal XML_ENC_ERR_SPACE.
1998
             */
1999
0
            if (*inlen < oldinlen) {
2000
0
                if (*outlen > 0)
2001
0
                    ret = XML_ENC_ERR_SPACE;
2002
0
                else
2003
0
                    ret = XML_ENC_ERR_PARTIAL;
2004
0
            } else {
2005
0
                ret = XML_ENC_ERR_SUCCESS;
2006
0
            }
2007
0
        }
2008
0
    }
2009
0
#ifdef LIBXML_ICONV_ENABLED
2010
0
    else if (handler->iconv_in != NULL) {
2011
0
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2012
0
    }
2013
0
#endif /* LIBXML_ICONV_ENABLED */
2014
#ifdef LIBXML_ICU_ENABLED
2015
    else if (handler->uconv_in != NULL) {
2016
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2017
    }
2018
#endif /* LIBXML_ICU_ENABLED */
2019
0
    else {
2020
0
        *outlen = 0;
2021
0
        *inlen = 0;
2022
0
        ret = XML_ENC_ERR_INTERNAL;
2023
0
    }
2024
2025
    /* Ignore partial errors when reading. */
2026
0
    if (ret == XML_ENC_ERR_PARTIAL)
2027
0
        ret = XML_ENC_ERR_SUCCESS;
2028
2029
0
    return(ret);
2030
0
}
2031
2032
/**
2033
 * xmlEncOutputChunk:
2034
 * @handler:  encoding handler
2035
 * @out:  a pointer to an array of bytes to store the result
2036
 * @outlen:  the length of @out
2037
 * @in:  a pointer to an array of input bytes
2038
 * @inlen:  the length of @in
2039
 *
2040
 * Returns an XML_ENC_ERR code.
2041
 *
2042
 * The value of @inlen after return is the number of octets consumed
2043
 *     as the return value is 0, else unpredictable.
2044
 * The value of @outlen after return is the number of octets produced.
2045
 */
2046
static int
2047
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2048
0
                  int *outlen, const unsigned char *in, int *inlen) {
2049
0
    int ret;
2050
2051
0
    if (handler->output != NULL) {
2052
0
        int oldinlen = *inlen;
2053
2054
0
        ret = handler->output(out, outlen, in, inlen);
2055
0
        if (ret >= 0) {
2056
            /*
2057
             * The built-in converters don't signal XML_ENC_ERR_SPACE.
2058
             */
2059
0
            if (*inlen < oldinlen) {
2060
0
                if (*outlen > 0)
2061
0
                    ret = XML_ENC_ERR_SPACE;
2062
0
                else
2063
0
                    ret = XML_ENC_ERR_PARTIAL;
2064
0
            } else {
2065
0
                ret = XML_ENC_ERR_SUCCESS;
2066
0
            }
2067
0
        }
2068
0
    }
2069
0
#ifdef LIBXML_ICONV_ENABLED
2070
0
    else if (handler->iconv_out != NULL) {
2071
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2072
0
    }
2073
0
#endif /* LIBXML_ICONV_ENABLED */
2074
#ifdef LIBXML_ICU_ENABLED
2075
    else if (handler->uconv_out != NULL) {
2076
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2077
    }
2078
#endif /* LIBXML_ICU_ENABLED */
2079
0
    else {
2080
0
        *outlen = 0;
2081
0
        *inlen = 0;
2082
0
        ret = XML_ENC_ERR_INTERNAL;
2083
0
    }
2084
2085
    /* We shouldn't generate partial sequences when writing. */
2086
0
    if (ret == XML_ENC_ERR_PARTIAL)
2087
0
        ret = XML_ENC_ERR_INTERNAL;
2088
2089
0
    return(ret);
2090
0
}
2091
2092
/**
2093
 * xmlCharEncFirstLine:
2094
 * @handler:   char encoding transformation data structure
2095
 * @out:  an xmlBuffer for the output.
2096
 * @in:  an xmlBuffer for the input
2097
 *
2098
 * DEPERECATED: Don't use.
2099
 *
2100
 * Returns the number of bytes written or an XML_ENC_ERR code.
2101
 */
2102
int
2103
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2104
0
                    xmlBufferPtr in) {
2105
0
    return(xmlCharEncInFunc(handler, out, in));
2106
0
}
2107
2108
/**
2109
 * xmlCharEncInput:
2110
 * @input: a parser input buffer
2111
 *
2112
 * Generic front-end for the encoding handler on parser input
2113
 *
2114
 * Returns the number of bytes written or an XML_ENC_ERR code.
2115
 */
2116
int
2117
xmlCharEncInput(xmlParserInputBufferPtr input)
2118
0
{
2119
0
    int ret;
2120
0
    size_t avail;
2121
0
    size_t toconv;
2122
0
    int c_in;
2123
0
    int c_out;
2124
0
    xmlBufPtr in;
2125
0
    xmlBufPtr out;
2126
0
    const xmlChar *inData;
2127
0
    size_t inTotal = 0;
2128
2129
0
    if ((input == NULL) || (input->encoder == NULL) ||
2130
0
        (input->buffer == NULL) || (input->raw == NULL))
2131
0
        return(XML_ENC_ERR_INTERNAL);
2132
0
    out = input->buffer;
2133
0
    in = input->raw;
2134
2135
0
    toconv = xmlBufUse(in);
2136
0
    if (toconv == 0)
2137
0
        return (0);
2138
0
    inData = xmlBufContent(in);
2139
0
    inTotal = 0;
2140
2141
0
    do {
2142
0
        c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2143
2144
0
        avail = xmlBufAvail(out);
2145
0
        if (avail > INT_MAX)
2146
0
            avail = INT_MAX;
2147
0
        if (avail < 4096) {
2148
0
            if (xmlBufGrow(out, 4096) < 0) {
2149
0
                input->error = XML_ERR_NO_MEMORY;
2150
0
                return(XML_ENC_ERR_MEMORY);
2151
0
            }
2152
0
            avail = xmlBufAvail(out);
2153
0
        }
2154
2155
0
        c_in = toconv;
2156
0
        c_out = avail;
2157
0
        ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2158
0
                               inData, &c_in);
2159
0
        inTotal += c_in;
2160
0
        inData += c_in;
2161
0
        toconv -= c_in;
2162
0
        xmlBufAddLen(out, c_out);
2163
0
    } while (ret == XML_ENC_ERR_SPACE);
2164
2165
0
    xmlBufShrink(in, inTotal);
2166
2167
0
    if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2168
0
        input->rawconsumed = ULONG_MAX;
2169
0
    else
2170
0
        input->rawconsumed += c_in;
2171
2172
0
    if ((c_out == 0) && (ret != 0)) {
2173
0
        if (input->error == 0)
2174
0
            input->error = xmlEncConvertError(ret);
2175
0
        return(ret);
2176
0
    }
2177
2178
0
    return (c_out);
2179
0
}
2180
2181
/**
2182
 * xmlCharEncInFunc:
2183
 * @handler:  char encoding transformation data structure
2184
 * @out:  an xmlBuffer for the output.
2185
 * @in:  an xmlBuffer for the input
2186
 *
2187
 * Generic front-end for the encoding handler input function
2188
 *
2189
 * Returns the number of bytes written or an XML_ENC_ERR code.
2190
 */
2191
int
2192
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2193
                 xmlBufferPtr in)
2194
0
{
2195
0
    int ret;
2196
0
    int written;
2197
0
    int toconv;
2198
2199
0
    if (handler == NULL)
2200
0
        return(XML_ENC_ERR_INTERNAL);
2201
0
    if (out == NULL)
2202
0
        return(XML_ENC_ERR_INTERNAL);
2203
0
    if (in == NULL)
2204
0
        return(XML_ENC_ERR_INTERNAL);
2205
2206
0
    toconv = in->use;
2207
0
    if (toconv == 0)
2208
0
        return (0);
2209
0
    written = out->size - out->use -1; /* count '\0' */
2210
0
    if (toconv * 2 >= written) {
2211
0
        xmlBufferGrow(out, out->size + toconv * 2);
2212
0
        written = out->size - out->use - 1;
2213
0
    }
2214
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2215
0
                           in->content, &toconv);
2216
0
    xmlBufferShrink(in, toconv);
2217
0
    out->use += written;
2218
0
    out->content[out->use] = 0;
2219
2220
0
    return (written? written : ret);
2221
0
}
2222
2223
#ifdef LIBXML_OUTPUT_ENABLED
2224
/**
2225
 * xmlCharEncOutput:
2226
 * @output: a parser output buffer
2227
 * @init: is this an initialization call without data
2228
 *
2229
 * Generic front-end for the encoding handler on parser output
2230
 * a first call with @init == 1 has to be made first to initiate the
2231
 * output in case of non-stateless encoding needing to initiate their
2232
 * state or the output (like the BOM in UTF16).
2233
 * In case of UTF8 sequence conversion errors for the given encoder,
2234
 * the content will be automatically remapped to a CharRef sequence.
2235
 *
2236
 * Returns the number of bytes written or an XML_ENC_ERR code.
2237
 */
2238
int
2239
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2240
0
{
2241
0
    int ret;
2242
0
    size_t written;
2243
0
    int writtentot = 0;
2244
0
    size_t toconv;
2245
0
    int c_in;
2246
0
    int c_out;
2247
0
    xmlBufPtr in;
2248
0
    xmlBufPtr out;
2249
2250
0
    if ((output == NULL) || (output->encoder == NULL) ||
2251
0
        (output->buffer == NULL) || (output->conv == NULL))
2252
0
        return(XML_ENC_ERR_INTERNAL);
2253
0
    out = output->conv;
2254
0
    in = output->buffer;
2255
2256
0
retry:
2257
2258
0
    written = xmlBufAvail(out);
2259
2260
    /*
2261
     * First specific handling of the initialization call
2262
     */
2263
0
    if (init) {
2264
0
        c_in = 0;
2265
0
        c_out = written;
2266
        /* TODO: Check return value. */
2267
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2268
0
                          NULL, &c_in);
2269
0
        xmlBufAddLen(out, c_out);
2270
0
        return(c_out);
2271
0
    }
2272
2273
    /*
2274
     * Conversion itself.
2275
     */
2276
0
    toconv = xmlBufUse(in);
2277
0
    if (toconv > 64 * 1024)
2278
0
        toconv = 64 * 1024;
2279
0
    if (toconv * 4 >= written) {
2280
0
        xmlBufGrow(out, toconv * 4);
2281
0
        written = xmlBufAvail(out);
2282
0
    }
2283
0
    if (written > 256 * 1024)
2284
0
        written = 256 * 1024;
2285
2286
0
    c_in = toconv;
2287
0
    c_out = written;
2288
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2289
0
                            xmlBufContent(in), &c_in);
2290
0
    xmlBufShrink(in, c_in);
2291
0
    xmlBufAddLen(out, c_out);
2292
0
    writtentot += c_out;
2293
2294
0
    if (ret == XML_ENC_ERR_SPACE)
2295
0
        goto retry;
2296
2297
    /*
2298
     * Attempt to handle error cases
2299
     */
2300
0
    if (ret == XML_ENC_ERR_INPUT) {
2301
0
        xmlChar charref[20];
2302
0
        int len = xmlBufUse(in);
2303
0
        xmlChar *content = xmlBufContent(in);
2304
0
        int cur, charrefLen;
2305
2306
0
        cur = xmlGetUTF8Char(content, &len);
2307
0
        if (cur <= 0)
2308
0
            goto error;
2309
2310
        /*
2311
         * Removes the UTF8 sequence, and replace it by a charref
2312
         * and continue the transcoding phase, hoping the error
2313
         * did not mangle the encoder state.
2314
         */
2315
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2316
0
                         "&#%d;", cur);
2317
0
        xmlBufShrink(in, len);
2318
0
        xmlBufGrow(out, charrefLen * 4);
2319
0
        c_out = xmlBufAvail(out);
2320
0
        c_in = charrefLen;
2321
0
        ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2322
0
                                charref, &c_in);
2323
0
        if ((ret < 0) || (c_in != charrefLen)) {
2324
0
            ret = XML_ENC_ERR_INTERNAL;
2325
0
            goto error;
2326
0
        }
2327
2328
0
        xmlBufAddLen(out, c_out);
2329
0
        writtentot += c_out;
2330
0
        goto retry;
2331
0
    }
2332
2333
0
error:
2334
0
    if ((writtentot <= 0) && (ret != 0)) {
2335
0
        if (output->error == 0)
2336
0
            output->error = xmlEncConvertError(ret);
2337
0
        return(ret);
2338
0
    }
2339
2340
0
    return(writtentot);
2341
0
}
2342
#endif
2343
2344
/**
2345
 * xmlCharEncOutFunc:
2346
 * @handler:  char encoding transformation data structure
2347
 * @out:  an xmlBuffer for the output.
2348
 * @in:  an xmlBuffer for the input
2349
 *
2350
 * Generic front-end for the encoding handler output function
2351
 * a first call with @in == NULL has to be made firs to initiate the
2352
 * output in case of non-stateless encoding needing to initiate their
2353
 * state or the output (like the BOM in UTF16).
2354
 * In case of UTF8 sequence conversion errors for the given encoder,
2355
 * the content will be automatically remapped to a CharRef sequence.
2356
 *
2357
 * Returns the number of bytes written or an XML_ENC_ERR code.
2358
 */
2359
int
2360
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2361
0
                  xmlBufferPtr in) {
2362
0
    int ret;
2363
0
    int written;
2364
0
    int writtentot = 0;
2365
0
    int toconv;
2366
2367
0
    if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2368
0
    if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2369
2370
0
retry:
2371
2372
0
    written = out->size - out->use;
2373
2374
0
    if (written > 0)
2375
0
  written--; /* Gennady: count '/0' */
2376
2377
    /*
2378
     * First specific handling of in = NULL, i.e. the initialization call
2379
     */
2380
0
    if (in == NULL) {
2381
0
        toconv = 0;
2382
        /* TODO: Check return value. */
2383
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2384
0
                          NULL, &toconv);
2385
0
        out->use += written;
2386
0
        out->content[out->use] = 0;
2387
0
        return(0);
2388
0
    }
2389
2390
    /*
2391
     * Conversion itself.
2392
     */
2393
0
    toconv = in->use;
2394
0
    if (toconv * 4 >= written) {
2395
0
        xmlBufferGrow(out, toconv * 4);
2396
0
  written = out->size - out->use - 1;
2397
0
    }
2398
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2399
0
                            in->content, &toconv);
2400
0
    xmlBufferShrink(in, toconv);
2401
0
    out->use += written;
2402
0
    writtentot += written;
2403
0
    out->content[out->use] = 0;
2404
2405
0
    if (ret == XML_ENC_ERR_SPACE)
2406
0
        goto retry;
2407
2408
    /*
2409
     * Attempt to handle error cases
2410
     */
2411
0
    if (ret == XML_ENC_ERR_INPUT) {
2412
0
        xmlChar charref[20];
2413
0
        int len = in->use;
2414
0
        const xmlChar *utf = (const xmlChar *) in->content;
2415
0
        int cur, charrefLen;
2416
2417
0
        cur = xmlGetUTF8Char(utf, &len);
2418
0
        if (cur <= 0)
2419
0
            return(ret);
2420
2421
        /*
2422
         * Removes the UTF8 sequence, and replace it by a charref
2423
         * and continue the transcoding phase, hoping the error
2424
         * did not mangle the encoder state.
2425
         */
2426
0
        charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2427
0
                         "&#%d;", cur);
2428
0
        xmlBufferShrink(in, len);
2429
0
        xmlBufferGrow(out, charrefLen * 4);
2430
0
        written = out->size - out->use - 1;
2431
0
        toconv = charrefLen;
2432
0
        ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2433
0
                                charref, &toconv);
2434
0
        if ((ret < 0) || (toconv != charrefLen))
2435
0
            return(XML_ENC_ERR_INTERNAL);
2436
2437
0
        out->use += written;
2438
0
        writtentot += written;
2439
0
        out->content[out->use] = 0;
2440
0
        goto retry;
2441
0
    }
2442
0
    return(writtentot ? writtentot : ret);
2443
0
}
2444
2445
/**
2446
 * xmlCharEncCloseFunc:
2447
 * @handler:  char encoding transformation data structure
2448
 *
2449
 * Generic front-end for encoding handler close function
2450
 *
2451
 * Returns 0 if success, or -1 in case of error
2452
 */
2453
int
2454
0
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2455
0
    int ret = 0;
2456
0
    int tofree = 0;
2457
0
    int i = 0;
2458
2459
0
    if (handler == NULL) return(-1);
2460
2461
0
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2462
0
        if (handler == &defaultHandlers[i])
2463
0
            return(0);
2464
0
    }
2465
2466
0
    if (handlers != NULL) {
2467
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2468
0
            if (handler == handlers[i])
2469
0
                return(0);
2470
0
  }
2471
0
    }
2472
0
#ifdef LIBXML_ICONV_ENABLED
2473
    /*
2474
     * Iconv handlers can be used only once, free the whole block.
2475
     * and the associated icon resources.
2476
     */
2477
0
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2478
0
        tofree = 1;
2479
0
  if (handler->iconv_out != NULL) {
2480
0
      if (iconv_close(handler->iconv_out))
2481
0
    ret = -1;
2482
0
      handler->iconv_out = NULL;
2483
0
  }
2484
0
  if (handler->iconv_in != NULL) {
2485
0
      if (iconv_close(handler->iconv_in))
2486
0
    ret = -1;
2487
0
      handler->iconv_in = NULL;
2488
0
  }
2489
0
    }
2490
0
#endif /* LIBXML_ICONV_ENABLED */
2491
#ifdef LIBXML_ICU_ENABLED
2492
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2493
        tofree = 1;
2494
  if (handler->uconv_out != NULL) {
2495
      closeIcuConverter(handler->uconv_out);
2496
      handler->uconv_out = NULL;
2497
  }
2498
  if (handler->uconv_in != NULL) {
2499
      closeIcuConverter(handler->uconv_in);
2500
      handler->uconv_in = NULL;
2501
  }
2502
    }
2503
#endif
2504
0
    if (tofree) {
2505
        /* free up only dynamic handlers iconv/uconv */
2506
0
        if (handler->name != NULL)
2507
0
            xmlFree(handler->name);
2508
0
        handler->name = NULL;
2509
0
        xmlFree(handler);
2510
0
    }
2511
2512
0
    return(ret);
2513
0
}
2514
2515
/**
2516
 * xmlByteConsumed:
2517
 * @ctxt: an XML parser context
2518
 *
2519
 * This function provides the current index of the parser relative
2520
 * to the start of the current entity. This function is computed in
2521
 * bytes from the beginning starting at zero and finishing at the
2522
 * size in byte of the file if parsing a file. The function is
2523
 * of constant cost if the input is UTF-8 but can be costly if run
2524
 * on non-UTF-8 input.
2525
 *
2526
 * Returns the index in bytes from the beginning of the entity or -1
2527
 *         in case the index could not be computed.
2528
 */
2529
long
2530
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2531
0
    xmlParserInputPtr in;
2532
2533
0
    if (ctxt == NULL) return(-1);
2534
0
    in = ctxt->input;
2535
0
    if (in == NULL)  return(-1);
2536
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2537
0
        unsigned int unused = 0;
2538
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2539
        /*
2540
   * Encoding conversion, compute the number of unused original
2541
   * bytes from the input not consumed and subtract that from
2542
   * the raw consumed value, this is not a cheap operation
2543
   */
2544
0
        if (in->end - in->cur > 0) {
2545
0
      unsigned char convbuf[32000];
2546
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2547
0
      int toconv = in->end - in->cur, written = 32000;
2548
2549
0
      int ret;
2550
2551
0
            do {
2552
0
                toconv = in->end - cur;
2553
0
                written = 32000;
2554
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2555
0
                                        cur, &toconv);
2556
0
                if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2557
0
                    return(-1);
2558
0
                unused += written;
2559
0
                cur += toconv;
2560
0
            } while (ret == XML_ENC_ERR_SPACE);
2561
0
  }
2562
0
  if (in->buf->rawconsumed < unused)
2563
0
      return(-1);
2564
0
  return(in->buf->rawconsumed - unused);
2565
0
    }
2566
0
    return(in->consumed + (in->cur - in->base));
2567
0
}
2568
2569
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2570
#ifdef LIBXML_ISO8859X_ENABLED
2571
2572
/**
2573
 * UTF8ToISO8859x:
2574
 * @out:  a pointer to an array of bytes to store the result
2575
 * @outlen:  the length of @out
2576
 * @in:  a pointer to an array of UTF-8 chars
2577
 * @inlen:  the length of @in
2578
 * @xlattable: the 2-level transcoding table
2579
 *
2580
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2581
 * block of chars out.
2582
 *
2583
 * Returns the number of bytes written or an XML_ENC_ERR code.
2584
 *
2585
 * The value of @inlen after return is the number of octets consumed
2586
 * as the return value is positive, else unpredictable.
2587
 * The value of @outlen after return is the number of octets consumed.
2588
 */
2589
static int
2590
UTF8ToISO8859x(unsigned char* out, int *outlen,
2591
              const unsigned char* in, int *inlen,
2592
              const unsigned char* const xlattable) {
2593
    const unsigned char* outstart = out;
2594
    const unsigned char* inend;
2595
    const unsigned char* instart = in;
2596
    const unsigned char* processed = in;
2597
2598
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2599
        (xlattable == NULL))
2600
  return(XML_ENC_ERR_INTERNAL);
2601
    if (in == NULL) {
2602
        /*
2603
        * initialization nothing to do
2604
        */
2605
        *outlen = 0;
2606
        *inlen = 0;
2607
        return(0);
2608
    }
2609
    inend = in + (*inlen);
2610
    while (in < inend) {
2611
        unsigned char d = *in++;
2612
        if  (d < 0x80)  {
2613
            *out++ = d;
2614
        } else if (d < 0xC0) {
2615
            /* trailing byte in leading position */
2616
            *outlen = out - outstart;
2617
            *inlen = processed - instart;
2618
            return(XML_ENC_ERR_INPUT);
2619
        } else if (d < 0xE0) {
2620
            unsigned char c;
2621
            if (!(in < inend)) {
2622
                /* trailing byte not in input buffer */
2623
                *outlen = out - outstart;
2624
                *inlen = processed - instart;
2625
                return(XML_ENC_ERR_PARTIAL);
2626
            }
2627
            c = *in++;
2628
            if ((c & 0xC0) != 0x80) {
2629
                /* not a trailing byte */
2630
                *outlen = out - outstart;
2631
                *inlen = processed - instart;
2632
                return(XML_ENC_ERR_INPUT);
2633
            }
2634
            c = c & 0x3F;
2635
            d = d & 0x1F;
2636
            d = xlattable [48 + c + xlattable [d] * 64];
2637
            if (d == 0) {
2638
                /* not in character set */
2639
                *outlen = out - outstart;
2640
                *inlen = processed - instart;
2641
                return(XML_ENC_ERR_INPUT);
2642
            }
2643
            *out++ = d;
2644
        } else if (d < 0xF0) {
2645
            unsigned char c1;
2646
            unsigned char c2;
2647
            if (!(in < inend - 1)) {
2648
                /* trailing bytes not in input buffer */
2649
                *outlen = out - outstart;
2650
                *inlen = processed - instart;
2651
                return(XML_ENC_ERR_PARTIAL);
2652
            }
2653
            c1 = *in++;
2654
            if ((c1 & 0xC0) != 0x80) {
2655
                /* not a trailing byte (c1) */
2656
                *outlen = out - outstart;
2657
                *inlen = processed - instart;
2658
                return(XML_ENC_ERR_INPUT);
2659
            }
2660
            c2 = *in++;
2661
            if ((c2 & 0xC0) != 0x80) {
2662
                /* not a trailing byte (c2) */
2663
                *outlen = out - outstart;
2664
                *inlen = processed - instart;
2665
                return(XML_ENC_ERR_INPUT);
2666
            }
2667
            c1 = c1 & 0x3F;
2668
            c2 = c2 & 0x3F;
2669
      d = d & 0x0F;
2670
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2671
      xlattable [32 + d] * 64] * 64];
2672
            if (d == 0) {
2673
                /* not in character set */
2674
                *outlen = out - outstart;
2675
                *inlen = processed - instart;
2676
                return(XML_ENC_ERR_INPUT);
2677
            }
2678
            *out++ = d;
2679
        } else {
2680
            /* cannot transcode >= U+010000 */
2681
            *outlen = out - outstart;
2682
            *inlen = processed - instart;
2683
            return(XML_ENC_ERR_INPUT);
2684
        }
2685
        processed = in;
2686
    }
2687
    *outlen = out - outstart;
2688
    *inlen = processed - instart;
2689
    return(*outlen);
2690
}
2691
2692
/**
2693
 * ISO8859xToUTF8
2694
 * @out:  a pointer to an array of bytes to store the result
2695
 * @outlen:  the length of @out
2696
 * @in:  a pointer to an array of ISO Latin 1 chars
2697
 * @inlen:  the length of @in
2698
 *
2699
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2700
 * block of chars out.
2701
 *
2702
 * Returns the number of bytes written or an XML_ENC_ERR code.
2703
 *
2704
 * The value of @inlen after return is the number of octets consumed
2705
 * The value of @outlen after return is the number of octets produced.
2706
 */
2707
static int
2708
ISO8859xToUTF8(unsigned char* out, int *outlen,
2709
              const unsigned char* in, int *inlen,
2710
              unsigned short const *unicodetable) {
2711
    unsigned char* outstart = out;
2712
    unsigned char* outend;
2713
    const unsigned char* instart = in;
2714
    const unsigned char* inend;
2715
    const unsigned char* instop;
2716
    unsigned int c;
2717
2718
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2719
        (in == NULL) || (unicodetable == NULL))
2720
  return(XML_ENC_ERR_INTERNAL);
2721
    outend = out + *outlen;
2722
    inend = in + *inlen;
2723
    instop = inend;
2724
2725
    while ((in < inend) && (out < outend - 2)) {
2726
        if (*in >= 0x80) {
2727
            c = unicodetable [*in - 0x80];
2728
            if (c == 0) {
2729
                /* undefined code point */
2730
                *outlen = out - outstart;
2731
                *inlen = in - instart;
2732
                return(XML_ENC_ERR_INPUT);
2733
            }
2734
            if (c < 0x800) {
2735
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2736
                *out++ = (c & 0x3F) | 0x80;
2737
            } else {
2738
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2739
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2740
                *out++ = (c & 0x3F) | 0x80;
2741
            }
2742
            ++in;
2743
        }
2744
        if (instop - in > outend - out) instop = in + (outend - out);
2745
        while ((*in < 0x80) && (in < instop)) {
2746
            *out++ = *in++;
2747
        }
2748
    }
2749
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2750
        *out++ =  *in++;
2751
    }
2752
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2753
        *out++ =  *in++;
2754
    }
2755
    *outlen = out - outstart;
2756
    *inlen = in - instart;
2757
    return (*outlen);
2758
}
2759
2760
2761
/************************************************************************
2762
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2763
 ************************************************************************/
2764
2765
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2766
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2767
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2768
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2769
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2770
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2771
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2772
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2773
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2774
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2775
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2776
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2777
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2778
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2779
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2780
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2781
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2782
};
2783
2784
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2785
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2786
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2793
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2794
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2795
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2796
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2797
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2798
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2800
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2801
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2802
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2805
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2806
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2807
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2808
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2809
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2810
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2811
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2812
};
2813
2814
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2815
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2816
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2817
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2818
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2819
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2820
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2821
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2822
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2823
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2824
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2825
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2826
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2827
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2828
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2829
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2830
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2831
};
2832
2833
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2834
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2835
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2842
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2843
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2844
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2845
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2847
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2848
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2860
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2861
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2862
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2863
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2864
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2865
};
2866
2867
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2868
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2869
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2870
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2871
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2872
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2873
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2874
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2875
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2876
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2877
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2878
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2879
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2880
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2881
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2882
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2883
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2884
};
2885
2886
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2887
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2888
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2895
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2896
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2897
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2898
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2899
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2900
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2901
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2902
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2903
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2904
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2905
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2906
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2907
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2908
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2911
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2912
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2913
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2914
};
2915
2916
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2917
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2918
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2919
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2920
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2921
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2922
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2923
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2924
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2925
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2926
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2927
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2928
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2929
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2930
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2931
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2932
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2933
};
2934
2935
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2936
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2944
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2945
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2946
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2948
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2949
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2950
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2951
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2952
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2953
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963
};
2964
2965
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2966
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2967
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2968
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2969
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2970
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2971
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2972
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2973
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2974
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2975
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2976
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2977
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2978
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2979
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2980
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2981
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2982
};
2983
2984
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2985
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2987
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2993
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2994
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2995
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3001
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3002
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3003
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3004
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3005
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008
};
3009
3010
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3011
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3012
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3013
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3014
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3015
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3016
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3017
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3018
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3019
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3020
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3021
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3022
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3023
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3024
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3025
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3026
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3027
};
3028
3029
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3030
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3031
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3038
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3039
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3040
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3041
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3047
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3054
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3055
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3056
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3057
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
};
3062
3063
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3064
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3065
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3066
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3067
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3068
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3069
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3070
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3071
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3072
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3073
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3074
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3075
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3076
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3077
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3078
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3079
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3080
};
3081
3082
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3083
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3085
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3091
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3092
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3093
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3094
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3100
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3102
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3107
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3112
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3113
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114
};
3115
3116
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3117
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3118
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3119
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3120
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3121
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3122
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3123
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3124
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3125
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3126
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3127
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3128
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3129
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3130
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3131
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3132
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3133
};
3134
3135
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3136
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3144
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3145
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3146
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3147
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3148
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3149
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3150
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3153
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3157
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159
};
3160
3161
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3162
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3163
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3164
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3165
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3166
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3167
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3168
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3169
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3170
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3171
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3172
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3173
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3174
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3175
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3176
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3177
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3178
};
3179
3180
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3181
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3189
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3190
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3191
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3192
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3193
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3194
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3195
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3196
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3197
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3199
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3200
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3209
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3210
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3211
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3212
};
3213
3214
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3215
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3216
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3217
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3218
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3219
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3220
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3221
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3222
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3223
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3224
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3225
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3226
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3227
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3228
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3229
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3230
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3231
};
3232
3233
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3234
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3242
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3243
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3249
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3250
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3251
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3252
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3253
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3258
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
};
3262
3263
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3264
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3265
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3266
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3267
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3268
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3269
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3270
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3271
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3272
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3273
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3274
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3275
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3276
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3277
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3278
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3279
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3280
};
3281
3282
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3283
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3291
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3292
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3293
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3294
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3300
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3303
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3304
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3306
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3308
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3309
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3310
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3311
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3313
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3314
};
3315
3316
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3317
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3318
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3319
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3320
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3321
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3322
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3323
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3324
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3325
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3326
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3327
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3328
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3329
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3330
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3331
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3332
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3333
};
3334
3335
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3336
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3344
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3345
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3346
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3351
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3352
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3353
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3356
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3376
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3377
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3378
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3379
};
3380
3381
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3382
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3383
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3384
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3385
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3386
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3387
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3388
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3389
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3390
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3391
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3392
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3393
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3394
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3395
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3396
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3397
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3398
};
3399
3400
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3401
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3409
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3410
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3411
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3412
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3424
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3425
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3426
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3427
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3428
};
3429
3430
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3431
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3432
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3433
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3434
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3435
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3436
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3437
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3438
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3439
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3440
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3441
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3442
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3443
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3444
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3445
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3446
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3447
};
3448
3449
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3450
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3451
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3458
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3459
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3460
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3461
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3462
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3467
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3469
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3486
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3487
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3488
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3489
};
3490
3491
3492
/*
3493
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3494
 */
3495
3496
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3497
    const unsigned char* in, int *inlen) {
3498
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3499
}
3500
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3501
    const unsigned char* in, int *inlen) {
3502
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3503
}
3504
3505
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3506
    const unsigned char* in, int *inlen) {
3507
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3508
}
3509
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3510
    const unsigned char* in, int *inlen) {
3511
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3512
}
3513
3514
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3515
    const unsigned char* in, int *inlen) {
3516
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3517
}
3518
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3519
    const unsigned char* in, int *inlen) {
3520
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3521
}
3522
3523
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3524
    const unsigned char* in, int *inlen) {
3525
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3526
}
3527
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3528
    const unsigned char* in, int *inlen) {
3529
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3530
}
3531
3532
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3533
    const unsigned char* in, int *inlen) {
3534
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3535
}
3536
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3537
    const unsigned char* in, int *inlen) {
3538
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3539
}
3540
3541
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3542
    const unsigned char* in, int *inlen) {
3543
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3544
}
3545
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3546
    const unsigned char* in, int *inlen) {
3547
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3548
}
3549
3550
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3551
    const unsigned char* in, int *inlen) {
3552
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3553
}
3554
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3555
    const unsigned char* in, int *inlen) {
3556
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3557
}
3558
3559
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3560
    const unsigned char* in, int *inlen) {
3561
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3562
}
3563
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3564
    const unsigned char* in, int *inlen) {
3565
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3566
}
3567
3568
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3569
    const unsigned char* in, int *inlen) {
3570
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3571
}
3572
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3573
    const unsigned char* in, int *inlen) {
3574
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3575
}
3576
3577
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3578
    const unsigned char* in, int *inlen) {
3579
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3580
}
3581
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3582
    const unsigned char* in, int *inlen) {
3583
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3584
}
3585
3586
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3587
    const unsigned char* in, int *inlen) {
3588
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3589
}
3590
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3591
    const unsigned char* in, int *inlen) {
3592
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3593
}
3594
3595
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3596
    const unsigned char* in, int *inlen) {
3597
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3598
}
3599
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3600
    const unsigned char* in, int *inlen) {
3601
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3602
}
3603
3604
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3605
    const unsigned char* in, int *inlen) {
3606
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3607
}
3608
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3609
    const unsigned char* in, int *inlen) {
3610
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3611
}
3612
3613
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3614
    const unsigned char* in, int *inlen) {
3615
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3616
}
3617
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3618
    const unsigned char* in, int *inlen) {
3619
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3620
}
3621
3622
#endif
3623
#endif
3624