Coverage Report

Created: 2023-05-05 14:18

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
1.47k
{
103
1.47k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
1.47k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
1.47k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
1.47k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
42.4k
              const unsigned char* in, int *inlen) {
182
42.4k
    unsigned char* outstart = out;
183
42.4k
    const unsigned char* base = in;
184
42.4k
    const unsigned char* processed = in;
185
42.4k
    unsigned char* outend = out + *outlen;
186
42.4k
    const unsigned char* inend;
187
42.4k
    unsigned int c;
188
189
42.4k
    inend = in + (*inlen);
190
1.44M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
1.44M
  c= *in++;
192
193
1.44M
        if (out >= outend)
194
0
      break;
195
1.44M
        if (c < 0x80) {
196
1.40M
      *out++ = c;
197
1.40M
  } else {
198
37.0k
      *outlen = out - outstart;
199
37.0k
      *inlen = processed - base;
200
37.0k
      return(-1);
201
37.0k
  }
202
203
1.40M
  processed = (const unsigned char*) in;
204
1.40M
    }
205
5.36k
    *outlen = out - outstart;
206
5.36k
    *inlen = processed - base;
207
5.36k
    return(*outlen);
208
42.4k
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
374
              const unsigned char* in, int *inlen) {
229
374
    const unsigned char* processed = in;
230
374
    const unsigned char* outend;
231
374
    const unsigned char* outstart = out;
232
374
    const unsigned char* instart = in;
233
374
    const unsigned char* inend;
234
374
    unsigned int c, d;
235
374
    int trailing;
236
237
374
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
374
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
164
  *outlen = 0;
243
164
  *inlen = 0;
244
164
  return(0);
245
164
    }
246
210
    inend = in + (*inlen);
247
210
    outend = out + (*outlen);
248
202k
    while (in < inend) {
249
202k
  d = *in++;
250
202k
  if      (d < 0x80)  { c= d; trailing= 0; }
251
7
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
7
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
2
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
202k
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
202k
  for ( ; trailing; trailing--) {
271
9
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
9
      c <<= 6;
274
9
      c |= d & 0x3F;
275
9
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
202k
  if (c < 0x80) {
279
202k
      if (out >= outend)
280
0
    break;
281
202k
      *out++ = c;
282
202k
  } else {
283
      /* no chance for this in Ascii */
284
7
      *outlen = out - outstart;
285
7
      *inlen = processed - instart;
286
7
      return(-2);
287
7
  }
288
202k
  processed = in;
289
202k
    }
290
203
    *outlen = out - outstart;
291
203
    *inlen = processed - instart;
292
203
    return(*outlen);
293
210
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
307k
              const unsigned char* in, int *inlen) {
313
307k
    unsigned char* outstart = out;
314
307k
    const unsigned char* base = in;
315
307k
    unsigned char* outend;
316
307k
    const unsigned char* inend;
317
307k
    const unsigned char* instop;
318
319
307k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
307k
    outend = out + *outlen;
323
307k
    inend = in + (*inlen);
324
307k
    instop = inend;
325
326
3.66M
    while ((in < inend) && (out < outend - 1)) {
327
3.35M
  if (*in >= 0x80) {
328
3.05M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
3.05M
            *out++ = ((*in) & 0x3F) | 0x80;
330
3.05M
      ++in;
331
3.05M
  }
332
3.35M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
171M
  while ((in < instop) && (*in < 0x80)) {
334
167M
      *out++ = *in++;
335
167M
  }
336
3.35M
    }
337
307k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
307k
    *outlen = out - outstart;
341
307k
    *inlen = in - base;
342
307k
    return(*outlen);
343
307k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
4.11k
{
362
4.11k
    int len;
363
364
4.11k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
4.11k
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
716
        *outlen = 0;
369
716
        *inlenb = 0;
370
716
        return(0);
371
716
    }
372
3.40k
    if (*outlen > *inlenb) {
373
3.40k
  len = *inlenb;
374
3.40k
    } else {
375
0
  len = *outlen;
376
0
    }
377
3.40k
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
3.40k
    memcpy(out, inb, len);
386
387
3.40k
    *outlen = len;
388
3.40k
    *inlenb = len;
389
3.40k
    return(*outlen);
390
3.40k
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
4.52k
              const unsigned char* in, int *inlen) {
413
4.52k
    const unsigned char* processed = in;
414
4.52k
    const unsigned char* outend;
415
4.52k
    const unsigned char* outstart = out;
416
4.52k
    const unsigned char* instart = in;
417
4.52k
    const unsigned char* inend;
418
4.52k
    unsigned int c, d;
419
4.52k
    int trailing;
420
421
4.52k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
4.52k
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
1.06k
  *outlen = 0;
427
1.06k
  *inlen = 0;
428
1.06k
  return(0);
429
1.06k
    }
430
3.45k
    inend = in + (*inlen);
431
3.45k
    outend = out + (*outlen);
432
10.4M
    while (in < inend) {
433
10.4M
  d = *in++;
434
10.4M
  if      (d < 0x80)  { c= d; trailing= 0; }
435
592k
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
0
      *outlen = out - outstart;
438
0
      *inlen = processed - instart;
439
0
      return(-2);
440
592k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
0
  else {
444
      /* no chance for this in IsoLat1 */
445
0
      *outlen = out - outstart;
446
0
      *inlen = processed - instart;
447
0
      return(-2);
448
0
  }
449
450
10.4M
  if (inend - in < trailing) {
451
141
      break;
452
141
  }
453
454
11.0M
  for ( ; trailing; trailing--) {
455
592k
      if (in >= inend)
456
0
    break;
457
592k
      if (((d= *in++) & 0xC0) != 0x80) {
458
0
    *outlen = out - outstart;
459
0
    *inlen = processed - instart;
460
0
    return(-2);
461
0
      }
462
592k
      c <<= 6;
463
592k
      c |= d & 0x3F;
464
592k
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
10.4M
  if (c <= 0xFF) {
468
10.4M
      if (out >= outend)
469
0
    break;
470
10.4M
      *out++ = c;
471
10.4M
  } else {
472
      /* no chance for this in IsoLat1 */
473
5
      *outlen = out - outstart;
474
5
      *inlen = processed - instart;
475
5
      return(-2);
476
5
  }
477
10.4M
  processed = in;
478
10.4M
    }
479
3.44k
    *outlen = out - outstart;
480
3.44k
    *inlen = processed - instart;
481
3.44k
    return(*outlen);
482
3.45k
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
30.4k
{
506
30.4k
    unsigned char* outstart = out;
507
30.4k
    const unsigned char* processed = inb;
508
30.4k
    unsigned char* outend;
509
30.4k
    unsigned short* in = (unsigned short*) inb;
510
30.4k
    unsigned short* inend;
511
30.4k
    unsigned int c, d, inlen;
512
30.4k
    unsigned char *tmp;
513
30.4k
    int bits;
514
515
30.4k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
30.4k
    outend = out + *outlen;
520
30.4k
    if ((*inlenb % 2) == 1)
521
3.59k
        (*inlenb)--;
522
30.4k
    inlen = *inlenb / 2;
523
30.4k
    inend = in + inlen;
524
2.22M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
2.19M
        if (xmlLittleEndian) {
526
2.19M
      c= *in++;
527
2.19M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
2.19M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
701
      if (in >= inend) {           /* handle split mutli-byte characters */
535
122
    break;
536
122
      }
537
579
      if (xmlLittleEndian) {
538
579
    d = *in++;
539
579
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
579
            if ((d & 0xFC00) == 0xDC00) {
546
62
                c &= 0x03FF;
547
62
                c <<= 10;
548
62
                c |= d & 0x03FF;
549
62
                c += 0x10000;
550
62
            }
551
517
            else {
552
517
    *outlen = out - outstart;
553
517
    *inlenb = processed - inb;
554
517
          return(-2);
555
517
      }
556
579
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
2.19M
        if (out >= outend)
560
0
      break;
561
2.19M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
2.18M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
2.18M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
62
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
6.56M
        for ( ; bits >= 0; bits-= 6) {
567
4.37M
            if (out >= outend)
568
0
          break;
569
4.37M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
4.37M
        }
571
2.19M
  processed = (const unsigned char*) in;
572
2.19M
    }
573
29.8k
    *outlen = out - outstart;
574
29.8k
    *inlenb = processed - inb;
575
29.8k
    return(*outlen);
576
30.4k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
0
{
596
0
    unsigned short* out = (unsigned short*) outb;
597
0
    const unsigned char* processed = in;
598
0
    const unsigned char *const instart = in;
599
0
    unsigned short* outstart= out;
600
0
    unsigned short* outend;
601
0
    const unsigned char* inend;
602
0
    unsigned int c, d;
603
0
    int trailing;
604
0
    unsigned char *tmp;
605
0
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
0
    if (in == NULL) {
610
0
  *outlen = 0;
611
0
  *inlen = 0;
612
0
  return(0);
613
0
    }
614
0
    inend= in + *inlen;
615
0
    outend = out + (*outlen / 2);
616
0
    while (in < inend) {
617
0
      d= *in++;
618
0
      if      (d < 0x80)  { c= d; trailing= 0; }
619
0
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
0
    *outlen = (out - outstart) * 2;
622
0
    *inlen = processed - instart;
623
0
    return(-2);
624
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
0
      else {
628
  /* no chance for this in UTF-16 */
629
0
  *outlen = (out - outstart) * 2;
630
0
  *inlen = processed - instart;
631
0
  return(-2);
632
0
      }
633
634
0
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
0
      for ( ; trailing; trailing--) {
639
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
0
        break;
641
0
          c <<= 6;
642
0
          c |= d & 0x3F;
643
0
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
0
        if (c < 0x10000) {
647
0
            if (out >= outend)
648
0
          break;
649
0
      if (xmlLittleEndian) {
650
0
    *out++ = c;
651
0
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
0
        }
658
0
        else if (c < 0x110000) {
659
0
            if (out+1 >= outend)
660
0
          break;
661
0
            c -= 0x10000;
662
0
      if (xmlLittleEndian) {
663
0
    *out++ = 0xD800 | (c >> 10);
664
0
    *out++ = 0xDC00 | (c & 0x03FF);
665
0
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
0
        }
679
0
        else
680
0
      break;
681
0
  processed = in;
682
0
    }
683
0
    *outlen = (out - outstart) * 2;
684
0
    *inlen = processed - instart;
685
0
    return(*outlen);
686
0
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
0
{
705
0
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
0
        if (*outlen >= 2) {
710
0
      outb[0] = 0xFF;
711
0
      outb[1] = 0xFE;
712
0
      *outlen = 2;
713
0
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
0
      return(2);
719
0
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
0
    }
724
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
0
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
5.62k
{
749
5.62k
    unsigned char* outstart = out;
750
5.62k
    const unsigned char* processed = inb;
751
5.62k
    unsigned char* outend;
752
5.62k
    unsigned short* in = (unsigned short*) inb;
753
5.62k
    unsigned short* inend;
754
5.62k
    unsigned int c, d, inlen;
755
5.62k
    unsigned char *tmp;
756
5.62k
    int bits;
757
758
5.62k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
5.62k
    outend = out + *outlen;
763
5.62k
    if ((*inlenb % 2) == 1)
764
2.73k
        (*inlenb)--;
765
5.62k
    inlen = *inlenb / 2;
766
5.62k
    inend= in + inlen;
767
220k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
215k
  if (xmlLittleEndian) {
769
215k
      tmp = (unsigned char *) in;
770
215k
      c = *tmp++;
771
215k
      c = (c << 8) | *tmp;
772
215k
      in++;
773
215k
  } else {
774
0
      c= *in++;
775
0
  }
776
215k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
852
      if (in >= inend) {           /* handle split mutli-byte characters */
778
584
                break;
779
584
      }
780
268
      if (xmlLittleEndian) {
781
268
    tmp = (unsigned char *) in;
782
268
    d = *tmp++;
783
268
    d = (d << 8) | *tmp;
784
268
    in++;
785
268
      } else {
786
0
    d= *in++;
787
0
      }
788
268
            if ((d & 0xFC00) == 0xDC00) {
789
45
                c &= 0x03FF;
790
45
                c <<= 10;
791
45
                c |= d & 0x03FF;
792
45
                c += 0x10000;
793
45
            }
794
223
            else {
795
223
    *outlen = out - outstart;
796
223
    *inlenb = processed - inb;
797
223
          return(-2);
798
223
      }
799
268
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
214k
        if (out >= outend)
803
0
      break;
804
214k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
210k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
208k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
45
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
633k
        for ( ; bits >= 0; bits-= 6) {
810
419k
            if (out >= outend)
811
0
          break;
812
419k
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
419k
        }
814
214k
  processed = (const unsigned char*) in;
815
214k
    }
816
5.40k
    *outlen = out - outstart;
817
5.40k
    *inlenb = processed - inb;
818
5.40k
    return(*outlen);
819
5.62k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
28
{
839
28
    unsigned short* out = (unsigned short*) outb;
840
28
    const unsigned char* processed = in;
841
28
    const unsigned char *const instart = in;
842
28
    unsigned short* outstart= out;
843
28
    unsigned short* outend;
844
28
    const unsigned char* inend;
845
28
    unsigned int c, d;
846
28
    int trailing;
847
28
    unsigned char *tmp;
848
28
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
28
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
28
    if (in == NULL) {
853
14
  *outlen = 0;
854
14
  *inlen = 0;
855
14
  return(0);
856
14
    }
857
14
    inend= in + *inlen;
858
14
    outend = out + (*outlen / 2);
859
602
    while (in < inend) {
860
588
      d= *in++;
861
588
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
588
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
588
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
588
        if (c < 0x10000) {
889
588
            if (out >= outend)  break;
890
588
      if (xmlLittleEndian) {
891
588
    tmp = (unsigned char *) out;
892
588
    *tmp = c >> 8;
893
588
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
588
    out++;
895
588
      } else {
896
0
    *out++ = c;
897
0
      }
898
588
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
588
  processed = in;
922
588
    }
923
14
    *outlen = (out - outstart) * 2;
924
14
    *inlen = processed - instart;
925
14
    return(*outlen);
926
14
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
125k
{
949
125k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
125k
    if (len >= 4) {
952
125k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
125k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
153
      return(XML_CHAR_ENCODING_UCS4BE);
955
125k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
125k
      (in[2] == 0x00) && (in[3] == 0x00))
957
194
      return(XML_CHAR_ENCODING_UCS4LE);
958
125k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
125k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
12
      return(XML_CHAR_ENCODING_UCS4_2143);
961
125k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
125k
      (in[2] == 0x00) && (in[3] == 0x00))
963
27
      return(XML_CHAR_ENCODING_UCS4_3412);
964
125k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
125k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
0
      return(XML_CHAR_ENCODING_EBCDIC);
967
125k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
125k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
58.7k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
66.4k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
66.4k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
372
      return(XML_CHAR_ENCODING_UTF16LE);
978
66.0k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
66.0k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
0
      return(XML_CHAR_ENCODING_UTF16BE);
981
66.0k
    }
982
66.0k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
66.0k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
66.0k
      (in[2] == 0xBF))
989
204
      return(XML_CHAR_ENCODING_UTF8);
990
66.0k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
65.8k
    if (len >= 2) {
993
65.8k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
729
      return(XML_CHAR_ENCODING_UTF16BE);
995
65.1k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
967
      return(XML_CHAR_ENCODING_UTF16LE);
997
65.1k
    }
998
64.1k
    return(XML_CHAR_ENCODING_NONE);
999
65.8k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
14.1k
xmlGetEncodingAlias(const char *alias) {
1035
14.1k
    int i;
1036
14.1k
    char upper[100];
1037
1038
14.1k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
14.1k
    if (xmlCharEncodingAliases == NULL)
1042
14.1k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = toupper(alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = toupper(alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
16.6k
{
1167
16.6k
    const char *alias;
1168
16.6k
    char upper[500];
1169
16.6k
    int i;
1170
1171
16.6k
    if (name == NULL)
1172
13.9k
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
2.62k
    alias = xmlGetEncodingAlias(name);
1178
2.62k
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
32.4k
    for (i = 0;i < 499;i++) {
1182
32.4k
        upper[i] = toupper(name[i]);
1183
32.4k
  if (upper[i] == 0) break;
1184
32.4k
    }
1185
2.62k
    upper[i] = 0;
1186
1187
2.62k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
2.62k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
1.91k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
1.91k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
1.91k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
1.91k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
1.91k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
1.91k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
1.91k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
1.46k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
1.46k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
1.46k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
394
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
394
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
394
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
393
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
393
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
393
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
392
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
390
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
379
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
367
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
357
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
354
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
341
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
341
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
341
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
341
    return(XML_CHAR_ENCODING_ERROR);
1235
341
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
489
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
489
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
450
        case XML_CHAR_ENCODING_UCS4LE:
1264
450
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
12
        case XML_CHAR_ENCODING_UCS4_2143:
1268
12
            return("ISO-10646-UCS-4");
1269
27
        case XML_CHAR_ENCODING_UCS4_3412:
1270
27
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS2:
1272
0
            return("ISO-10646-UCS-2");
1273
0
        case XML_CHAR_ENCODING_8859_1:
1274
0
      return("ISO-8859-1");
1275
0
        case XML_CHAR_ENCODING_8859_2:
1276
0
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
489
    }
1300
0
    return(NULL);
1301
489
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
125k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = toupper(name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
29
xmlInitEncodingInternal(void) {
1501
29
    unsigned short int tst = 0x1234;
1502
29
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
29
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
29
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
29
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
165k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
165k
    xmlCharEncodingHandlerPtr handler;
1592
1593
165k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
142k
        case XML_CHAR_ENCODING_NONE:
1597
142k
      return(NULL);
1598
19.5k
        case XML_CHAR_ENCODING_UTF8:
1599
19.5k
      return(NULL);
1600
1.78k
        case XML_CHAR_ENCODING_UTF16LE:
1601
1.78k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
972
        case XML_CHAR_ENCODING_UTF16BE:
1603
972
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
0
        case XML_CHAR_ENCODING_EBCDIC:
1605
0
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
0
            if (handler != NULL) return(handler);
1607
0
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
0
            if (handler != NULL) return(handler);
1609
0
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
0
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
204
        case XML_CHAR_ENCODING_UCS4BE:
1615
204
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
204
            if (handler != NULL) return(handler);
1617
204
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
204
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
246
        case XML_CHAR_ENCODING_UCS4LE:
1623
246
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
246
            if (handler != NULL) return(handler);
1625
246
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
246
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
16
        case XML_CHAR_ENCODING_UCS4_2143:
1631
16
      break;
1632
36
        case XML_CHAR_ENCODING_UCS4_3412:
1633
36
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
165k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
52
    return(NULL);
1712
165k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
11.4k
xmlFindCharEncodingHandler(const char *name) {
1725
11.4k
    const char *nalias;
1726
11.4k
    const char *norig;
1727
11.4k
    xmlCharEncoding alias;
1728
11.4k
#ifdef LIBXML_ICONV_ENABLED
1729
11.4k
    xmlCharEncodingHandlerPtr enc;
1730
11.4k
    iconv_t icv_in, icv_out;
1731
11.4k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
11.4k
    char upper[100];
1737
11.4k
    int i;
1738
1739
11.4k
    if (name == NULL) return(NULL);
1740
11.4k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
11.4k
    norig = name;
1746
11.4k
    nalias = xmlGetEncodingAlias(name);
1747
11.4k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
122k
    for (i = 0;i < 99;i++) {
1754
121k
        upper[i] = toupper(name[i]);
1755
121k
  if (upper[i] == 0) break;
1756
121k
    }
1757
11.4k
    upper[i] = 0;
1758
1759
62.9k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
61.4k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
10.0k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
61.4k
    }
1763
1764
1.46k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
1.46k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
1.46k
    icv_in = iconv_open("UTF-8", name);
1779
1.46k
    icv_out = iconv_open(name, "UTF-8");
1780
1.46k
    if (icv_in == (iconv_t) -1) {
1781
603
        icv_in = iconv_open("UTF-8", upper);
1782
603
    }
1783
1.46k
    if (icv_out == (iconv_t) -1) {
1784
603
  icv_out = iconv_open(upper, "UTF-8");
1785
603
    }
1786
1.46k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
861
      enc = (xmlCharEncodingHandlerPtr)
1788
861
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
861
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
861
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
861
      enc->name = xmlMemStrdup(name);
1796
861
      enc->input = NULL;
1797
861
      enc->output = NULL;
1798
861
      enc->iconv_in = icv_in;
1799
861
      enc->iconv_out = icv_out;
1800
#ifdef DEBUG_ENCODING
1801
            xmlGenericError(xmlGenericErrorContext,
1802
        "Found iconv handler for encoding %s\n", name);
1803
#endif
1804
861
      return enc;
1805
861
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1806
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1807
0
        "iconv : problems with filters for '%s'\n", name);
1808
0
      if (icv_in != (iconv_t) -1)
1809
0
    iconv_close(icv_in);
1810
0
      else
1811
0
    iconv_close(icv_out);
1812
0
    }
1813
603
#endif /* LIBXML_ICONV_ENABLED */
1814
#ifdef LIBXML_ICU_ENABLED
1815
    /* check whether icu can handle this */
1816
    ucv_in = openIcuConverter(name, 1);
1817
    ucv_out = openIcuConverter(name, 0);
1818
    if (ucv_in != NULL && ucv_out != NULL) {
1819
      encu = (xmlCharEncodingHandlerPtr)
1820
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1821
      if (encu == NULL) {
1822
                closeIcuConverter(ucv_in);
1823
                closeIcuConverter(ucv_out);
1824
    return(NULL);
1825
      }
1826
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1827
      encu->name = xmlMemStrdup(name);
1828
      encu->input = NULL;
1829
      encu->output = NULL;
1830
      encu->uconv_in = ucv_in;
1831
      encu->uconv_out = ucv_out;
1832
#ifdef DEBUG_ENCODING
1833
            xmlGenericError(xmlGenericErrorContext,
1834
        "Found ICU converter handler for encoding %s\n", name);
1835
#endif
1836
      return encu;
1837
    } else if (ucv_in != NULL || ucv_out != NULL) {
1838
            closeIcuConverter(ucv_in);
1839
            closeIcuConverter(ucv_out);
1840
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1841
        "ICU converter : problems with filters for '%s'\n", name);
1842
    }
1843
#endif /* LIBXML_ICU_ENABLED */
1844
1845
#ifdef DEBUG_ENCODING
1846
    xmlGenericError(xmlGenericErrorContext,
1847
      "No handler found for encoding %s\n", name);
1848
#endif
1849
1850
    /*
1851
     * Fallback using the canonical names
1852
     */
1853
603
    alias = xmlParseCharEncoding(norig);
1854
603
    if (alias != XML_CHAR_ENCODING_ERROR) {
1855
450
        const char* canon;
1856
450
        canon = xmlGetCharEncodingName(alias);
1857
450
        if ((canon != NULL) && (strcmp(name, canon))) {
1858
0
      return(xmlFindCharEncodingHandler(canon));
1859
0
        }
1860
450
    }
1861
1862
    /* If "none of the above", give up */
1863
603
    return(NULL);
1864
603
}
1865
1866
/************************************************************************
1867
 *                  *
1868
 *    ICONV based generic conversion functions    *
1869
 *                  *
1870
 ************************************************************************/
1871
1872
#ifdef LIBXML_ICONV_ENABLED
1873
/**
1874
 * xmlIconvWrapper:
1875
 * @cd:   iconv converter data structure
1876
 * @out:  a pointer to an array of bytes to store the result
1877
 * @outlen:  the length of @out
1878
 * @in:  a pointer to an array of input bytes
1879
 * @inlen:  the length of @in
1880
 *
1881
 * Returns 0 if success, or
1882
 *     -1 by lack of space, or
1883
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1884
 *        the result of transformation can't fit into the encoding we want), or
1885
 *     -3 if there the last byte can't form a single output char.
1886
 *
1887
 * The value of @inlen after return is the number of octets consumed
1888
 *     as the return value is positive, else unpredictable.
1889
 * The value of @outlen after return is the number of octets produced.
1890
 */
1891
static int
1892
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1893
6.08k
                const unsigned char *in, int *inlen) {
1894
6.08k
    size_t icv_inlen, icv_outlen;
1895
6.08k
    const char *icv_in = (const char *) in;
1896
6.08k
    char *icv_out = (char *) out;
1897
6.08k
    size_t ret;
1898
1899
6.08k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900
63
        if (outlen != NULL) *outlen = 0;
1901
63
        return(-1);
1902
63
    }
1903
6.02k
    icv_inlen = *inlen;
1904
6.02k
    icv_outlen = *outlen;
1905
    /*
1906
     * Some versions take const, other versions take non-const input.
1907
     */
1908
6.02k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1909
6.02k
    *inlen -= icv_inlen;
1910
6.02k
    *outlen -= icv_outlen;
1911
6.02k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1912
1.23k
#ifdef EILSEQ
1913
1.23k
        if (errno == EILSEQ) {
1914
736
            return -2;
1915
736
        } else
1916
501
#endif
1917
501
#ifdef E2BIG
1918
501
        if (errno == E2BIG) {
1919
12
            return -1;
1920
12
        } else
1921
489
#endif
1922
489
#ifdef EINVAL
1923
489
        if (errno == EINVAL) {
1924
489
            return -3;
1925
489
        } else
1926
0
#endif
1927
0
        {
1928
0
            return -3;
1929
0
        }
1930
1.23k
    }
1931
4.78k
    return 0;
1932
6.02k
}
1933
#endif /* LIBXML_ICONV_ENABLED */
1934
1935
/************************************************************************
1936
 *                  *
1937
 *    ICU based generic conversion functions    *
1938
 *                  *
1939
 ************************************************************************/
1940
1941
#ifdef LIBXML_ICU_ENABLED
1942
/**
1943
 * xmlUconvWrapper:
1944
 * @cd: ICU uconverter data structure
1945
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1946
 * @out:  a pointer to an array of bytes to store the result
1947
 * @outlen:  the length of @out
1948
 * @in:  a pointer to an array of input bytes
1949
 * @inlen:  the length of @in
1950
 * @flush: if true, indicates end of input
1951
 *
1952
 * Returns 0 if success, or
1953
 *     -1 by lack of space, or
1954
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1955
 *        the result of transformation can't fit into the encoding we want), or
1956
 *     -3 if there the last byte can't form a single output char.
1957
 *
1958
 * The value of @inlen after return is the number of octets consumed
1959
 *     as the return value is positive, else unpredictable.
1960
 * The value of @outlen after return is the number of octets produced.
1961
 */
1962
static int
1963
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1964
                const unsigned char *in, int *inlen, int flush) {
1965
    const char *ucv_in = (const char *) in;
1966
    char *ucv_out = (char *) out;
1967
    UErrorCode err = U_ZERO_ERROR;
1968
1969
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1970
        if (outlen != NULL) *outlen = 0;
1971
        return(-1);
1972
    }
1973
1974
    if (toUnicode) {
1975
        /* encoding => UTF-16 => UTF-8 */
1976
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1977
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1978
                       &cd->pivot_source, &cd->pivot_target,
1979
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1980
    } else {
1981
        /* UTF-8 => UTF-16 => encoding */
1982
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1983
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1984
                       &cd->pivot_source, &cd->pivot_target,
1985
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1986
    }
1987
    *inlen = ucv_in - (const char*) in;
1988
    *outlen = ucv_out - (char *) out;
1989
    if (U_SUCCESS(err)) {
1990
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1991
        if (flush)
1992
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1993
        return 0;
1994
    }
1995
    if (err == U_BUFFER_OVERFLOW_ERROR)
1996
        return -1;
1997
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1998
        return -2;
1999
    return -3;
2000
}
2001
#endif /* LIBXML_ICU_ENABLED */
2002
2003
/************************************************************************
2004
 *                  *
2005
 *    The real API used by libxml for on-the-fly conversion *
2006
 *                  *
2007
 ************************************************************************/
2008
2009
/**
2010
 * xmlEncInputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 * @flush:  flush (ICU-related)
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
392k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2031
392k
    int ret;
2032
392k
    (void)flush;
2033
2034
392k
    if (handler->input != NULL) {
2035
386k
        ret = handler->input(out, outlen, in, inlen);
2036
386k
        if (ret > 0)
2037
342k
           ret = 0;
2038
386k
    }
2039
5.94k
#ifdef LIBXML_ICONV_ENABLED
2040
5.94k
    else if (handler->iconv_in != NULL) {
2041
5.94k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2042
5.94k
    }
2043
0
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_in != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2047
                              flush);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
0
    else {
2051
0
        *outlen = 0;
2052
0
        *inlen = 0;
2053
0
        ret = -2;
2054
0
    }
2055
2056
392k
    return(ret);
2057
392k
}
2058
2059
/**
2060
 * xmlEncOutputChunk:
2061
 * @handler:  encoding handler
2062
 * @out:  a pointer to an array of bytes to store the result
2063
 * @outlen:  the length of @out
2064
 * @in:  a pointer to an array of input bytes
2065
 * @inlen:  the length of @in
2066
 *
2067
 * Returns 0 if success, or
2068
 *     -1 by lack of space, or
2069
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2070
 *        the result of transformation can't fit into the encoding we want), or
2071
 *     -3 if there the last byte can't form a single output char.
2072
 *     -4 if no output function was found.
2073
 *
2074
 * The value of @inlen after return is the number of octets consumed
2075
 *     as the return value is 0, else unpredictable.
2076
 * The value of @outlen after return is the number of octets produced.
2077
 */
2078
static int
2079
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2080
9.18k
                  int *outlen, const unsigned char *in, int *inlen) {
2081
9.18k
    int ret;
2082
2083
9.18k
    if (handler->output != NULL) {
2084
9.04k
        ret = handler->output(out, outlen, in, inlen);
2085
9.04k
        if (ret > 0)
2086
7.06k
           ret = 0;
2087
9.04k
    }
2088
147
#ifdef LIBXML_ICONV_ENABLED
2089
147
    else if (handler->iconv_out != NULL) {
2090
147
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2091
147
    }
2092
0
#endif /* LIBXML_ICONV_ENABLED */
2093
#ifdef LIBXML_ICU_ENABLED
2094
    else if (handler->uconv_out != NULL) {
2095
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2096
                              1);
2097
    }
2098
#endif /* LIBXML_ICU_ENABLED */
2099
0
    else {
2100
0
        *outlen = 0;
2101
0
        *inlen = 0;
2102
0
        ret = -4;
2103
0
    }
2104
2105
9.18k
    return(ret);
2106
9.18k
}
2107
2108
/**
2109
 * xmlCharEncFirstLine:
2110
 * @handler:  char encoding transformation data structure
2111
 * @out:  an xmlBuffer for the output.
2112
 * @in:  an xmlBuffer for the input
2113
 *
2114
 * Front-end for the encoding handler input function, but handle only
2115
 * the very first line, i.e. limit itself to 45 chars.
2116
 *
2117
 * Returns the number of byte written if success, or
2118
 *     -1 general error
2119
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2120
 *        the result of transformation can't fit into the encoding we want), or
2121
 */
2122
int
2123
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2124
0
                    xmlBufferPtr in) {
2125
0
    int ret;
2126
0
    int written;
2127
0
    int toconv;
2128
2129
0
    if (handler == NULL) return(-1);
2130
0
    if (out == NULL) return(-1);
2131
0
    if (in == NULL) return(-1);
2132
2133
    /* calculate space available */
2134
0
    written = out->size - out->use - 1; /* count '\0' */
2135
0
    toconv = in->use;
2136
    /*
2137
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2138
     * 45 chars should be sufficient to reach the end of the encoding
2139
     * declaration without going too far inside the document content.
2140
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2141
     * The actual value depending on guessed encoding is passed as @len
2142
     * if provided
2143
     */
2144
0
    if (toconv > 180)
2145
0
        toconv = 180;
2146
0
    if (toconv * 2 >= written) {
2147
0
        xmlBufferGrow(out, toconv * 2);
2148
0
  written = out->size - out->use - 1;
2149
0
    }
2150
2151
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2152
0
                           in->content, &toconv, 0);
2153
0
    xmlBufferShrink(in, toconv);
2154
0
    out->use += written;
2155
0
    out->content[out->use] = 0;
2156
0
    if (ret == -1) ret = -3;
2157
2158
#ifdef DEBUG_ENCODING
2159
    switch (ret) {
2160
        case 0:
2161
      xmlGenericError(xmlGenericErrorContext,
2162
        "converted %d bytes to %d bytes of input\n",
2163
              toconv, written);
2164
      break;
2165
        case -1:
2166
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2167
              toconv, written, in->use);
2168
      break;
2169
        case -2:
2170
      xmlGenericError(xmlGenericErrorContext,
2171
        "input conversion failed due to input error\n");
2172
      break;
2173
        case -3:
2174
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2175
              toconv, written, in->use);
2176
      break;
2177
  default:
2178
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2179
    }
2180
#endif /* DEBUG_ENCODING */
2181
    /*
2182
     * Ignore when input buffer is not on a boundary
2183
     */
2184
0
    if (ret == -3) ret = 0;
2185
0
    if (ret == -1) ret = 0;
2186
0
    return(written ? written : ret);
2187
0
}
2188
2189
/**
2190
 * xmlCharEncFirstLineInput:
2191
 * @input: a parser input buffer
2192
 * @len:  number of bytes to convert for the first line, or -1
2193
 *
2194
 * Front-end for the encoding handler input function, but handle only
2195
 * the very first line. Point is that this is based on autodetection
2196
 * of the encoding and once that first line is converted we may find
2197
 * out that a different decoder is needed to process the input.
2198
 *
2199
 * Returns the number of byte written if success, or
2200
 *     -1 general error
2201
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2202
 *        the result of transformation can't fit into the encoding we want), or
2203
 */
2204
int
2205
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2206
10.0k
{
2207
10.0k
    int ret;
2208
10.0k
    size_t written;
2209
10.0k
    size_t toconv;
2210
10.0k
    int c_in;
2211
10.0k
    int c_out;
2212
10.0k
    xmlBufPtr in;
2213
10.0k
    xmlBufPtr out;
2214
2215
10.0k
    if ((input == NULL) || (input->encoder == NULL) ||
2216
10.0k
        (input->buffer == NULL) || (input->raw == NULL))
2217
0
        return (-1);
2218
10.0k
    out = input->buffer;
2219
10.0k
    in = input->raw;
2220
2221
10.0k
    toconv = xmlBufUse(in);
2222
10.0k
    if (toconv == 0)
2223
0
        return (0);
2224
10.0k
    written = xmlBufAvail(out);
2225
    /*
2226
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2227
     * 45 chars should be sufficient to reach the end of the encoding
2228
     * declaration without going too far inside the document content.
2229
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2230
     * The actual value depending on guessed encoding is passed as @len
2231
     * if provided
2232
     */
2233
10.0k
    if (len >= 0) {
2234
1.62k
        if (toconv > (unsigned int) len)
2235
1.22k
            toconv = len;
2236
8.42k
    } else {
2237
8.42k
        if (toconv > 180)
2238
5.64k
            toconv = 180;
2239
8.42k
    }
2240
10.0k
    if (toconv * 2 >= written) {
2241
0
        xmlBufGrow(out, toconv * 2);
2242
0
        written = xmlBufAvail(out);
2243
0
    }
2244
10.0k
    if (written > 360)
2245
10.0k
        written = 360;
2246
2247
10.0k
    c_in = toconv;
2248
10.0k
    c_out = written;
2249
10.0k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2250
10.0k
                           xmlBufContent(in), &c_in, 0);
2251
10.0k
    xmlBufShrink(in, c_in);
2252
10.0k
    xmlBufAddLen(out, c_out);
2253
10.0k
    if (ret == -1)
2254
210
        ret = -3;
2255
2256
10.0k
    switch (ret) {
2257
9.53k
        case 0:
2258
#ifdef DEBUG_ENCODING
2259
            xmlGenericError(xmlGenericErrorContext,
2260
                            "converted %d bytes to %d bytes of input\n",
2261
                            c_in, c_out);
2262
#endif
2263
9.53k
            break;
2264
0
        case -1:
2265
#ifdef DEBUG_ENCODING
2266
            xmlGenericError(xmlGenericErrorContext,
2267
                         "converted %d bytes to %d bytes of input, %d left\n",
2268
                            c_in, c_out, (int)xmlBufUse(in));
2269
#endif
2270
0
            break;
2271
254
        case -3:
2272
#ifdef DEBUG_ENCODING
2273
            xmlGenericError(xmlGenericErrorContext,
2274
                        "converted %d bytes to %d bytes of input, %d left\n",
2275
                            c_in, c_out, (int)xmlBufUse(in));
2276
#endif
2277
254
            break;
2278
254
        case -2: {
2279
254
            char buf[50];
2280
254
            const xmlChar *content = xmlBufContent(in);
2281
2282
254
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2283
254
         content[0], content[1],
2284
254
         content[2], content[3]);
2285
254
      buf[49] = 0;
2286
254
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2287
254
        "input conversion failed due to input error, bytes %s\n",
2288
254
               buf);
2289
254
        }
2290
10.0k
    }
2291
    /*
2292
     * Ignore when input buffer is not on a boundary
2293
     */
2294
10.0k
    if (ret == -3) ret = 0;
2295
10.0k
    if (ret == -1) ret = 0;
2296
10.0k
    return(c_out ? c_out : ret);
2297
10.0k
}
2298
2299
/**
2300
 * xmlCharEncInput:
2301
 * @input: a parser input buffer
2302
 * @flush: try to flush all the raw buffer
2303
 *
2304
 * Generic front-end for the encoding handler on parser input
2305
 *
2306
 * Returns the number of byte written if success, or
2307
 *     -1 general error
2308
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2309
 *        the result of transformation can't fit into the encoding we want), or
2310
 */
2311
int
2312
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2313
511k
{
2314
511k
    int ret;
2315
511k
    size_t written;
2316
511k
    size_t toconv;
2317
511k
    int c_in;
2318
511k
    int c_out;
2319
511k
    xmlBufPtr in;
2320
511k
    xmlBufPtr out;
2321
2322
511k
    if ((input == NULL) || (input->encoder == NULL) ||
2323
511k
        (input->buffer == NULL) || (input->raw == NULL))
2324
0
        return (-1);
2325
511k
    out = input->buffer;
2326
511k
    in = input->raw;
2327
2328
511k
    toconv = xmlBufUse(in);
2329
511k
    if (toconv == 0)
2330
129k
        return (0);
2331
382k
    if ((toconv > 64 * 1024) && (flush == 0))
2332
0
        toconv = 64 * 1024;
2333
382k
    written = xmlBufAvail(out);
2334
382k
    if (toconv * 2 >= written) {
2335
5.41k
        if (xmlBufGrow(out, toconv * 2) < 0)
2336
0
            return (-1);
2337
5.41k
        written = xmlBufAvail(out);
2338
5.41k
    }
2339
382k
    if ((written > 128 * 1024) && (flush == 0))
2340
0
        written = 128 * 1024;
2341
2342
382k
    c_in = toconv;
2343
382k
    c_out = written;
2344
382k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2345
382k
                           xmlBufContent(in), &c_in, flush);
2346
382k
    xmlBufShrink(in, c_in);
2347
382k
    xmlBufAddLen(out, c_out);
2348
382k
    if (ret == -1)
2349
36.8k
        ret = -3;
2350
2351
382k
    switch (ret) {
2352
343k
        case 0:
2353
#ifdef DEBUG_ENCODING
2354
            xmlGenericError(xmlGenericErrorContext,
2355
                            "converted %d bytes to %d bytes of input\n",
2356
                            c_in, c_out);
2357
#endif
2358
343k
            break;
2359
0
        case -1:
2360
#ifdef DEBUG_ENCODING
2361
            xmlGenericError(xmlGenericErrorContext,
2362
                         "converted %d bytes to %d bytes of input, %d left\n",
2363
                            c_in, c_out, (int)xmlBufUse(in));
2364
#endif
2365
0
            break;
2366
37.3k
        case -3:
2367
#ifdef DEBUG_ENCODING
2368
            xmlGenericError(xmlGenericErrorContext,
2369
                        "converted %d bytes to %d bytes of input, %d left\n",
2370
                            c_in, c_out, (int)xmlBufUse(in));
2371
#endif
2372
37.3k
            break;
2373
1.22k
        case -2: {
2374
1.22k
            char buf[50];
2375
1.22k
            const xmlChar *content = xmlBufContent(in);
2376
2377
1.22k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2378
1.22k
         content[0], content[1],
2379
1.22k
         content[2], content[3]);
2380
1.22k
      buf[49] = 0;
2381
1.22k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2382
1.22k
        "input conversion failed due to input error, bytes %s\n",
2383
1.22k
               buf);
2384
1.22k
        }
2385
382k
    }
2386
    /*
2387
     * Ignore when input buffer is not on a boundary
2388
     */
2389
382k
    if (ret == -3)
2390
37.3k
        ret = 0;
2391
382k
    return (c_out? c_out : ret);
2392
382k
}
2393
2394
/**
2395
 * xmlCharEncInFunc:
2396
 * @handler:  char encoding transformation data structure
2397
 * @out:  an xmlBuffer for the output.
2398
 * @in:  an xmlBuffer for the input
2399
 *
2400
 * Generic front-end for the encoding handler input function
2401
 *
2402
 * Returns the number of byte written if success, or
2403
 *     -1 general error
2404
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2405
 *        the result of transformation can't fit into the encoding we want), or
2406
 */
2407
int
2408
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2409
                 xmlBufferPtr in)
2410
0
{
2411
0
    int ret;
2412
0
    int written;
2413
0
    int toconv;
2414
2415
0
    if (handler == NULL)
2416
0
        return (-1);
2417
0
    if (out == NULL)
2418
0
        return (-1);
2419
0
    if (in == NULL)
2420
0
        return (-1);
2421
2422
0
    toconv = in->use;
2423
0
    if (toconv == 0)
2424
0
        return (0);
2425
0
    written = out->size - out->use -1; /* count '\0' */
2426
0
    if (toconv * 2 >= written) {
2427
0
        xmlBufferGrow(out, out->size + toconv * 2);
2428
0
        written = out->size - out->use - 1;
2429
0
    }
2430
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2431
0
                           in->content, &toconv, 1);
2432
0
    xmlBufferShrink(in, toconv);
2433
0
    out->use += written;
2434
0
    out->content[out->use] = 0;
2435
0
    if (ret == -1)
2436
0
        ret = -3;
2437
2438
0
    switch (ret) {
2439
0
        case 0:
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                            "converted %d bytes to %d bytes of input\n",
2443
                            toconv, written);
2444
#endif
2445
0
            break;
2446
0
        case -1:
2447
#ifdef DEBUG_ENCODING
2448
            xmlGenericError(xmlGenericErrorContext,
2449
                         "converted %d bytes to %d bytes of input, %d left\n",
2450
                            toconv, written, in->use);
2451
#endif
2452
0
            break;
2453
0
        case -3:
2454
#ifdef DEBUG_ENCODING
2455
            xmlGenericError(xmlGenericErrorContext,
2456
                        "converted %d bytes to %d bytes of input, %d left\n",
2457
                            toconv, written, in->use);
2458
#endif
2459
0
            break;
2460
0
        case -2: {
2461
0
            char buf[50];
2462
2463
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2464
0
         in->content[0], in->content[1],
2465
0
         in->content[2], in->content[3]);
2466
0
      buf[49] = 0;
2467
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2468
0
        "input conversion failed due to input error, bytes %s\n",
2469
0
               buf);
2470
0
        }
2471
0
    }
2472
    /*
2473
     * Ignore when input buffer is not on a boundary
2474
     */
2475
0
    if (ret == -3)
2476
0
        ret = 0;
2477
0
    return (written? written : ret);
2478
0
}
2479
2480
#ifdef LIBXML_OUTPUT_ENABLED
2481
/**
2482
 * xmlCharEncOutput:
2483
 * @output: a parser output buffer
2484
 * @init: is this an initialization call without data
2485
 *
2486
 * Generic front-end for the encoding handler on parser output
2487
 * a first call with @init == 1 has to be made first to initiate the
2488
 * output in case of non-stateless encoding needing to initiate their
2489
 * state or the output (like the BOM in UTF16).
2490
 * In case of UTF8 sequence conversion errors for the given encoder,
2491
 * the content will be automatically remapped to a CharRef sequence.
2492
 *
2493
 * Returns the number of byte written if success, or
2494
 *     -1 general error
2495
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2496
 *        the result of transformation can't fit into the encoding we want), or
2497
 */
2498
int
2499
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2500
11.1k
{
2501
11.1k
    int ret;
2502
11.1k
    size_t written;
2503
11.1k
    int writtentot = 0;
2504
11.1k
    size_t toconv;
2505
11.1k
    int c_in;
2506
11.1k
    int c_out;
2507
11.1k
    xmlBufPtr in;
2508
11.1k
    xmlBufPtr out;
2509
2510
11.1k
    if ((output == NULL) || (output->encoder == NULL) ||
2511
11.1k
        (output->buffer == NULL) || (output->conv == NULL))
2512
0
        return (-1);
2513
11.1k
    out = output->conv;
2514
11.1k
    in = output->buffer;
2515
2516
11.1k
retry:
2517
2518
11.1k
    written = xmlBufAvail(out);
2519
2520
    /*
2521
     * First specific handling of the initialization call
2522
     */
2523
11.1k
    if (init) {
2524
2.02k
        c_in = 0;
2525
2.02k
        c_out = written;
2526
        /* TODO: Check return value. */
2527
2.02k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2528
2.02k
                          NULL, &c_in);
2529
2.02k
        xmlBufAddLen(out, c_out);
2530
#ifdef DEBUG_ENCODING
2531
  xmlGenericError(xmlGenericErrorContext,
2532
    "initialized encoder\n");
2533
#endif
2534
2.02k
        return(c_out);
2535
2.02k
    }
2536
2537
    /*
2538
     * Conversion itself.
2539
     */
2540
9.17k
    toconv = xmlBufUse(in);
2541
9.17k
    if (toconv == 0)
2542
2.02k
        return (writtentot);
2543
7.15k
    if (toconv > 64 * 1024)
2544
2
        toconv = 64 * 1024;
2545
7.15k
    if (toconv * 4 >= written) {
2546
1.94k
        xmlBufGrow(out, toconv * 4);
2547
1.94k
        written = xmlBufAvail(out);
2548
1.94k
    }
2549
7.15k
    if (written > 256 * 1024)
2550
4
        written = 256 * 1024;
2551
2552
7.15k
    c_in = toconv;
2553
7.15k
    c_out = written;
2554
7.15k
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2555
7.15k
                            xmlBufContent(in), &c_in);
2556
7.15k
    xmlBufShrink(in, c_in);
2557
7.15k
    xmlBufAddLen(out, c_out);
2558
7.15k
    writtentot += c_out;
2559
7.15k
    if (ret == -1) {
2560
0
        if (c_out > 0) {
2561
            /* Can be a limitation of iconv or uconv */
2562
0
            goto retry;
2563
0
        }
2564
0
        ret = -3;
2565
0
    }
2566
2567
    /*
2568
     * Attempt to handle error cases
2569
     */
2570
7.15k
    switch (ret) {
2571
7.14k
        case 0:
2572
#ifdef DEBUG_ENCODING
2573
      xmlGenericError(xmlGenericErrorContext,
2574
        "converted %d bytes to %d bytes of output\n",
2575
              c_in, c_out);
2576
#endif
2577
7.14k
      break;
2578
0
        case -1:
2579
#ifdef DEBUG_ENCODING
2580
      xmlGenericError(xmlGenericErrorContext,
2581
        "output conversion failed by lack of space\n");
2582
#endif
2583
0
      break;
2584
0
        case -3:
2585
#ifdef DEBUG_ENCODING
2586
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2587
              c_in, c_out, (int) xmlBufUse(in));
2588
#endif
2589
0
      break;
2590
0
        case -4:
2591
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2592
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2593
0
            ret = -1;
2594
0
            break;
2595
12
        case -2: {
2596
12
      xmlChar charref[20];
2597
12
      int len = xmlBufUse(in);
2598
12
            xmlChar *content = xmlBufContent(in);
2599
12
      int cur, charrefLen;
2600
2601
12
      cur = xmlGetUTF8Char(content, &len);
2602
12
      if (cur <= 0)
2603
0
                break;
2604
2605
#ifdef DEBUG_ENCODING
2606
            xmlGenericError(xmlGenericErrorContext,
2607
                    "handling output conversion error\n");
2608
            xmlGenericError(xmlGenericErrorContext,
2609
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2610
                    content[0], content[1],
2611
                    content[2], content[3]);
2612
#endif
2613
            /*
2614
             * Removes the UTF8 sequence, and replace it by a charref
2615
             * and continue the transcoding phase, hoping the error
2616
             * did not mangle the encoder state.
2617
             */
2618
12
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2619
12
                             "&#%d;", cur);
2620
12
            xmlBufShrink(in, len);
2621
12
            xmlBufGrow(out, charrefLen * 4);
2622
12
            c_out = xmlBufAvail(out);
2623
12
            c_in = charrefLen;
2624
12
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2625
12
                                    charref, &c_in);
2626
2627
12
      if ((ret < 0) || (c_in != charrefLen)) {
2628
0
    char buf[50];
2629
2630
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2631
0
       content[0], content[1],
2632
0
       content[2], content[3]);
2633
0
    buf[49] = 0;
2634
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2635
0
        "output conversion failed due to conv error, bytes %s\n",
2636
0
             buf);
2637
0
    content[0] = ' ';
2638
0
                break;
2639
0
      }
2640
2641
12
            xmlBufAddLen(out, c_out);
2642
12
            writtentot += c_out;
2643
12
            goto retry;
2644
12
  }
2645
7.15k
    }
2646
7.14k
    return(writtentot ? writtentot : ret);
2647
7.15k
}
2648
#endif
2649
2650
/**
2651
 * xmlCharEncOutFunc:
2652
 * @handler:  char encoding transformation data structure
2653
 * @out:  an xmlBuffer for the output.
2654
 * @in:  an xmlBuffer for the input
2655
 *
2656
 * Generic front-end for the encoding handler output function
2657
 * a first call with @in == NULL has to be made firs to initiate the
2658
 * output in case of non-stateless encoding needing to initiate their
2659
 * state or the output (like the BOM in UTF16).
2660
 * In case of UTF8 sequence conversion errors for the given encoder,
2661
 * the content will be automatically remapped to a CharRef sequence.
2662
 *
2663
 * Returns the number of byte written if success, or
2664
 *     -1 general error
2665
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2666
 *        the result of transformation can't fit into the encoding we want), or
2667
 */
2668
int
2669
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2670
0
                  xmlBufferPtr in) {
2671
0
    int ret;
2672
0
    int written;
2673
0
    int writtentot = 0;
2674
0
    int toconv;
2675
2676
0
    if (handler == NULL) return(-1);
2677
0
    if (out == NULL) return(-1);
2678
2679
0
retry:
2680
2681
0
    written = out->size - out->use;
2682
2683
0
    if (written > 0)
2684
0
  written--; /* Gennady: count '/0' */
2685
2686
    /*
2687
     * First specific handling of in = NULL, i.e. the initialization call
2688
     */
2689
0
    if (in == NULL) {
2690
0
        toconv = 0;
2691
        /* TODO: Check return value. */
2692
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2693
0
                          NULL, &toconv);
2694
0
        out->use += written;
2695
0
        out->content[out->use] = 0;
2696
#ifdef DEBUG_ENCODING
2697
  xmlGenericError(xmlGenericErrorContext,
2698
    "initialized encoder\n");
2699
#endif
2700
0
        return(0);
2701
0
    }
2702
2703
    /*
2704
     * Conversion itself.
2705
     */
2706
0
    toconv = in->use;
2707
0
    if (toconv == 0)
2708
0
  return(0);
2709
0
    if (toconv * 4 >= written) {
2710
0
        xmlBufferGrow(out, toconv * 4);
2711
0
  written = out->size - out->use - 1;
2712
0
    }
2713
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714
0
                            in->content, &toconv);
2715
0
    xmlBufferShrink(in, toconv);
2716
0
    out->use += written;
2717
0
    writtentot += written;
2718
0
    out->content[out->use] = 0;
2719
0
    if (ret == -1) {
2720
0
        if (written > 0) {
2721
            /* Can be a limitation of iconv or uconv */
2722
0
            goto retry;
2723
0
        }
2724
0
        ret = -3;
2725
0
    }
2726
2727
    /*
2728
     * Attempt to handle error cases
2729
     */
2730
0
    switch (ret) {
2731
0
        case 0:
2732
#ifdef DEBUG_ENCODING
2733
      xmlGenericError(xmlGenericErrorContext,
2734
        "converted %d bytes to %d bytes of output\n",
2735
              toconv, written);
2736
#endif
2737
0
      break;
2738
0
        case -1:
2739
#ifdef DEBUG_ENCODING
2740
      xmlGenericError(xmlGenericErrorContext,
2741
        "output conversion failed by lack of space\n");
2742
#endif
2743
0
      break;
2744
0
        case -3:
2745
#ifdef DEBUG_ENCODING
2746
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2747
              toconv, written, in->use);
2748
#endif
2749
0
      break;
2750
0
        case -4:
2751
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2752
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2753
0
      ret = -1;
2754
0
            break;
2755
0
        case -2: {
2756
0
      xmlChar charref[20];
2757
0
      int len = in->use;
2758
0
      const xmlChar *utf = (const xmlChar *) in->content;
2759
0
      int cur, charrefLen;
2760
2761
0
      cur = xmlGetUTF8Char(utf, &len);
2762
0
      if (cur <= 0)
2763
0
                break;
2764
2765
#ifdef DEBUG_ENCODING
2766
            xmlGenericError(xmlGenericErrorContext,
2767
                    "handling output conversion error\n");
2768
            xmlGenericError(xmlGenericErrorContext,
2769
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2770
                    in->content[0], in->content[1],
2771
                    in->content[2], in->content[3]);
2772
#endif
2773
            /*
2774
             * Removes the UTF8 sequence, and replace it by a charref
2775
             * and continue the transcoding phase, hoping the error
2776
             * did not mangle the encoder state.
2777
             */
2778
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2779
0
                             "&#%d;", cur);
2780
0
            xmlBufferShrink(in, len);
2781
0
            xmlBufferGrow(out, charrefLen * 4);
2782
0
      written = out->size - out->use - 1;
2783
0
            toconv = charrefLen;
2784
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2785
0
                                    charref, &toconv);
2786
2787
0
      if ((ret < 0) || (toconv != charrefLen)) {
2788
0
    char buf[50];
2789
2790
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2791
0
       in->content[0], in->content[1],
2792
0
       in->content[2], in->content[3]);
2793
0
    buf[49] = 0;
2794
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2795
0
        "output conversion failed due to conv error, bytes %s\n",
2796
0
             buf);
2797
0
    in->content[0] = ' ';
2798
0
          break;
2799
0
      }
2800
2801
0
            out->use += written;
2802
0
            writtentot += written;
2803
0
            out->content[out->use] = 0;
2804
0
            goto retry;
2805
0
  }
2806
0
    }
2807
0
    return(writtentot ? writtentot : ret);
2808
0
}
2809
2810
/**
2811
 * xmlCharEncCloseFunc:
2812
 * @handler:  char encoding transformation data structure
2813
 *
2814
 * Generic front-end for encoding handler close function
2815
 *
2816
 * Returns 0 if success, or -1 in case of error
2817
 */
2818
int
2819
12.9k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2820
12.9k
    int ret = 0;
2821
12.9k
    int tofree = 0;
2822
12.9k
    int i = 0;
2823
2824
12.9k
    if (handler == NULL) return(-1);
2825
12.9k
    if (handler->name == NULL) return(-1);
2826
2827
62.3k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2828
61.5k
        if (handler == &defaultHandlers[i])
2829
12.1k
            return(0);
2830
61.5k
    }
2831
2832
861
    if (handlers != NULL) {
2833
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2834
0
            if (handler == handlers[i])
2835
0
                return(0);
2836
0
  }
2837
0
    }
2838
861
#ifdef LIBXML_ICONV_ENABLED
2839
    /*
2840
     * Iconv handlers can be used only once, free the whole block.
2841
     * and the associated icon resources.
2842
     */
2843
861
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2844
861
        tofree = 1;
2845
861
  if (handler->iconv_out != NULL) {
2846
861
      if (iconv_close(handler->iconv_out))
2847
0
    ret = -1;
2848
861
      handler->iconv_out = NULL;
2849
861
  }
2850
861
  if (handler->iconv_in != NULL) {
2851
861
      if (iconv_close(handler->iconv_in))
2852
0
    ret = -1;
2853
861
      handler->iconv_in = NULL;
2854
861
  }
2855
861
    }
2856
861
#endif /* LIBXML_ICONV_ENABLED */
2857
#ifdef LIBXML_ICU_ENABLED
2858
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2859
        tofree = 1;
2860
  if (handler->uconv_out != NULL) {
2861
      closeIcuConverter(handler->uconv_out);
2862
      handler->uconv_out = NULL;
2863
  }
2864
  if (handler->uconv_in != NULL) {
2865
      closeIcuConverter(handler->uconv_in);
2866
      handler->uconv_in = NULL;
2867
  }
2868
    }
2869
#endif
2870
861
    if (tofree) {
2871
        /* free up only dynamic handlers iconv/uconv */
2872
861
        if (handler->name != NULL)
2873
861
            xmlFree(handler->name);
2874
861
        handler->name = NULL;
2875
861
        xmlFree(handler);
2876
861
    }
2877
#ifdef DEBUG_ENCODING
2878
    if (ret)
2879
        xmlGenericError(xmlGenericErrorContext,
2880
    "failed to close the encoding handler\n");
2881
    else
2882
        xmlGenericError(xmlGenericErrorContext,
2883
    "closed the encoding handler\n");
2884
#endif
2885
2886
861
    return(ret);
2887
861
}
2888
2889
/**
2890
 * xmlByteConsumed:
2891
 * @ctxt: an XML parser context
2892
 *
2893
 * This function provides the current index of the parser relative
2894
 * to the start of the current entity. This function is computed in
2895
 * bytes from the beginning starting at zero and finishing at the
2896
 * size in byte of the file if parsing a file. The function is
2897
 * of constant cost if the input is UTF-8 but can be costly if run
2898
 * on non-UTF-8 input.
2899
 *
2900
 * Returns the index in bytes from the beginning of the entity or -1
2901
 *         in case the index could not be computed.
2902
 */
2903
long
2904
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2905
0
    xmlParserInputPtr in;
2906
2907
0
    if (ctxt == NULL) return(-1);
2908
0
    in = ctxt->input;
2909
0
    if (in == NULL)  return(-1);
2910
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2911
0
        unsigned int unused = 0;
2912
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2913
        /*
2914
   * Encoding conversion, compute the number of unused original
2915
   * bytes from the input not consumed and subtract that from
2916
   * the raw consumed value, this is not a cheap operation
2917
   */
2918
0
        if (in->end - in->cur > 0) {
2919
0
      unsigned char convbuf[32000];
2920
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2921
0
      int toconv = in->end - in->cur, written = 32000;
2922
2923
0
      int ret;
2924
2925
0
            do {
2926
0
                toconv = in->end - cur;
2927
0
                written = 32000;
2928
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2929
0
                                        cur, &toconv);
2930
0
                if (ret < 0) {
2931
0
                    if (written > 0)
2932
0
                        ret = -2;
2933
0
                    else
2934
0
                        return(-1);
2935
0
                }
2936
0
                unused += written;
2937
0
                cur += toconv;
2938
0
            } while (ret == -2);
2939
0
  }
2940
0
  if (in->buf->rawconsumed < unused)
2941
0
      return(-1);
2942
0
  return(in->buf->rawconsumed - unused);
2943
0
    }
2944
0
    return(in->consumed + (in->cur - in->base));
2945
0
}
2946
2947
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2948
#ifdef LIBXML_ISO8859X_ENABLED
2949
2950
/**
2951
 * UTF8ToISO8859x:
2952
 * @out:  a pointer to an array of bytes to store the result
2953
 * @outlen:  the length of @out
2954
 * @in:  a pointer to an array of UTF-8 chars
2955
 * @inlen:  the length of @in
2956
 * @xlattable: the 2-level transcoding table
2957
 *
2958
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2959
 * block of chars out.
2960
 *
2961
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2962
 * The value of @inlen after return is the number of octets consumed
2963
 *     as the return value is positive, else unpredictable.
2964
 * The value of @outlen after return is the number of octets consumed.
2965
 */
2966
static int
2967
UTF8ToISO8859x(unsigned char* out, int *outlen,
2968
              const unsigned char* in, int *inlen,
2969
              const unsigned char* const xlattable) {
2970
    const unsigned char* outstart = out;
2971
    const unsigned char* inend;
2972
    const unsigned char* instart = in;
2973
    const unsigned char* processed = in;
2974
2975
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2976
        (xlattable == NULL))
2977
  return(-1);
2978
    if (in == NULL) {
2979
        /*
2980
        * initialization nothing to do
2981
        */
2982
        *outlen = 0;
2983
        *inlen = 0;
2984
        return(0);
2985
    }
2986
    inend = in + (*inlen);
2987
    while (in < inend) {
2988
        unsigned char d = *in++;
2989
        if  (d < 0x80)  {
2990
            *out++ = d;
2991
        } else if (d < 0xC0) {
2992
            /* trailing byte in leading position */
2993
            *outlen = out - outstart;
2994
            *inlen = processed - instart;
2995
            return(-2);
2996
        } else if (d < 0xE0) {
2997
            unsigned char c;
2998
            if (!(in < inend)) {
2999
                /* trailing byte not in input buffer */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-3);
3003
            }
3004
            c = *in++;
3005
            if ((c & 0xC0) != 0x80) {
3006
                /* not a trailing byte */
3007
                *outlen = out - outstart;
3008
                *inlen = processed - instart;
3009
                return(-2);
3010
            }
3011
            c = c & 0x3F;
3012
            d = d & 0x1F;
3013
            d = xlattable [48 + c + xlattable [d] * 64];
3014
            if (d == 0) {
3015
                /* not in character set */
3016
                *outlen = out - outstart;
3017
                *inlen = processed - instart;
3018
                return(-2);
3019
            }
3020
            *out++ = d;
3021
        } else if (d < 0xF0) {
3022
            unsigned char c1;
3023
            unsigned char c2;
3024
            if (!(in < inend - 1)) {
3025
                /* trailing bytes not in input buffer */
3026
                *outlen = out - outstart;
3027
                *inlen = processed - instart;
3028
                return(-3);
3029
            }
3030
            c1 = *in++;
3031
            if ((c1 & 0xC0) != 0x80) {
3032
                /* not a trailing byte (c1) */
3033
                *outlen = out - outstart;
3034
                *inlen = processed - instart;
3035
                return(-2);
3036
            }
3037
            c2 = *in++;
3038
            if ((c2 & 0xC0) != 0x80) {
3039
                /* not a trailing byte (c2) */
3040
                *outlen = out - outstart;
3041
                *inlen = processed - instart;
3042
                return(-2);
3043
            }
3044
            c1 = c1 & 0x3F;
3045
            c2 = c2 & 0x3F;
3046
      d = d & 0x0F;
3047
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3048
      xlattable [32 + d] * 64] * 64];
3049
            if (d == 0) {
3050
                /* not in character set */
3051
                *outlen = out - outstart;
3052
                *inlen = processed - instart;
3053
                return(-2);
3054
            }
3055
            *out++ = d;
3056
        } else {
3057
            /* cannot transcode >= U+010000 */
3058
            *outlen = out - outstart;
3059
            *inlen = processed - instart;
3060
            return(-2);
3061
        }
3062
        processed = in;
3063
    }
3064
    *outlen = out - outstart;
3065
    *inlen = processed - instart;
3066
    return(*outlen);
3067
}
3068
3069
/**
3070
 * ISO8859xToUTF8
3071
 * @out:  a pointer to an array of bytes to store the result
3072
 * @outlen:  the length of @out
3073
 * @in:  a pointer to an array of ISO Latin 1 chars
3074
 * @inlen:  the length of @in
3075
 *
3076
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3077
 * block of chars out.
3078
 * Returns 0 if success, or -1 otherwise
3079
 * The value of @inlen after return is the number of octets consumed
3080
 * The value of @outlen after return is the number of octets produced.
3081
 */
3082
static int
3083
ISO8859xToUTF8(unsigned char* out, int *outlen,
3084
              const unsigned char* in, int *inlen,
3085
              unsigned short const *unicodetable) {
3086
    unsigned char* outstart = out;
3087
    unsigned char* outend;
3088
    const unsigned char* instart = in;
3089
    const unsigned char* inend;
3090
    const unsigned char* instop;
3091
    unsigned int c;
3092
3093
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3094
        (in == NULL) || (unicodetable == NULL))
3095
  return(-1);
3096
    outend = out + *outlen;
3097
    inend = in + *inlen;
3098
    instop = inend;
3099
3100
    while ((in < inend) && (out < outend - 2)) {
3101
        if (*in >= 0x80) {
3102
            c = unicodetable [*in - 0x80];
3103
            if (c == 0) {
3104
                /* undefined code point */
3105
                *outlen = out - outstart;
3106
                *inlen = in - instart;
3107
                return (-1);
3108
            }
3109
            if (c < 0x800) {
3110
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3111
                *out++ = (c & 0x3F) | 0x80;
3112
            } else {
3113
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3114
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3115
                *out++ = (c & 0x3F) | 0x80;
3116
            }
3117
            ++in;
3118
        }
3119
        if (instop - in > outend - out) instop = in + (outend - out);
3120
        while ((*in < 0x80) && (in < instop)) {
3121
            *out++ = *in++;
3122
        }
3123
    }
3124
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3125
        *out++ =  *in++;
3126
    }
3127
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3128
        *out++ =  *in++;
3129
    }
3130
    *outlen = out - outstart;
3131
    *inlen = in - instart;
3132
    return (*outlen);
3133
}
3134
3135
3136
/************************************************************************
3137
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3138
 ************************************************************************/
3139
3140
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3146
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3147
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3148
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3149
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3150
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3151
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3152
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3153
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3154
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3155
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3156
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3160
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3170
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3172
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3180
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3181
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3182
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3183
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3184
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3185
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3186
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3187
};
3188
3189
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3190
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3195
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3196
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3197
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3198
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3199
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3201
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3202
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3203
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3205
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3206
};
3207
3208
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3209
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3222
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3234
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3236
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3237
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3238
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3239
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3240
};
3241
3242
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3243
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3244
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3245
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3246
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3247
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3248
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3249
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3250
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3251
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3252
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3253
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3254
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3255
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3256
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3257
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3258
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3259
};
3260
3261
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3262
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3270
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3271
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3272
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3273
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3274
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3276
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3280
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3286
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3287
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3288
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3289
};
3290
3291
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3292
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3297
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3298
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3299
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3300
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3301
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3302
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3303
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3304
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3305
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3306
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3307
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3308
};
3309
3310
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3311
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3323
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3324
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3326
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3347
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3349
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3350
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3351
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3352
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3353
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3354
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3355
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3356
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3360
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3377
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3378
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3379
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3380
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
};
3384
3385
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3386
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3387
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3388
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3389
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3390
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3391
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3392
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3393
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3394
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3395
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3396
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3397
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3398
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3399
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3400
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3401
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3402
};
3403
3404
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3405
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3413
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3414
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3415
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3416
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3431
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
};
3437
3438
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3439
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3440
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3441
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3442
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3443
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3444
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3445
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3446
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3447
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3448
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3449
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3450
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3451
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3452
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3453
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3454
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3455
};
3456
3457
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3458
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3466
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3467
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3468
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3469
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3482
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3487
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
};
3490
3491
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3492
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3493
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3494
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3495
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3496
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3497
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3498
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3499
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3500
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3501
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3502
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3503
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3504
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3505
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3506
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3507
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3508
};
3509
3510
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3511
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3519
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3520
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3524
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3525
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
};
3535
3536
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3537
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3538
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3539
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3540
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3541
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3542
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3543
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3544
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3545
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3546
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3547
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3548
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3549
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3550
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3551
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3552
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3553
};
3554
3555
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3556
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3564
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3565
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3566
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3568
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3570
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3574
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3575
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3584
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3585
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3586
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3587
};
3588
3589
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3590
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3591
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3592
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3593
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3594
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3595
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3596
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3597
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3598
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3599
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3600
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3601
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3602
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3603
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3604
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3605
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3606
};
3607
3608
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3609
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3617
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3618
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3624
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3625
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3626
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3627
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3628
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3633
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
};
3637
3638
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3639
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3644
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3645
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3646
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3647
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3648
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3649
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3650
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3651
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3652
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3653
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3654
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3655
};
3656
3657
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3658
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3668
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3669
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3679
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3681
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3683
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3684
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3685
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3686
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3688
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3689
};
3690
3691
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3692
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3693
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3694
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3695
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3696
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3697
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3698
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3699
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3700
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3701
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3702
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3703
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3704
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3705
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3706
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3707
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3708
};
3709
3710
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3711
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3719
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3720
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3721
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3726
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3746
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3751
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3752
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3753
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3754
};
3755
3756
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3757
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3758
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3759
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3760
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3761
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3762
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3763
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3764
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3765
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3766
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3767
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3768
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3769
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3770
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3771
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3772
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3773
};
3774
3775
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3776
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3784
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3785
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3786
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3787
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3794
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3799
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3801
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3803
};
3804
3805
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3806
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3811
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3812
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3813
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3814
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3815
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3817
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3818
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3819
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3821
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3822
};
3823
3824
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3825
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3835
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3836
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3837
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3842
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3844
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3860
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3861
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3862
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3863
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3864
};
3865
3866
3867
/*
3868
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3869
 */
3870
3871
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3874
}
3875
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3878
}
3879
3880
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3883
}
3884
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3887
}
3888
3889
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3892
}
3893
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3894
    const unsigned char* in, int *inlen) {
3895
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3896
}
3897
3898
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3901
}
3902
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3903
    const unsigned char* in, int *inlen) {
3904
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3905
}
3906
3907
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3910
}
3911
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3912
    const unsigned char* in, int *inlen) {
3913
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3914
}
3915
3916
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3919
}
3920
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3921
    const unsigned char* in, int *inlen) {
3922
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3923
}
3924
3925
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3928
}
3929
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3930
    const unsigned char* in, int *inlen) {
3931
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3932
}
3933
3934
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3937
}
3938
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3939
    const unsigned char* in, int *inlen) {
3940
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3941
}
3942
3943
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3946
}
3947
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3948
    const unsigned char* in, int *inlen) {
3949
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3950
}
3951
3952
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3955
}
3956
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3957
    const unsigned char* in, int *inlen) {
3958
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3959
}
3960
3961
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3964
}
3965
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3966
    const unsigned char* in, int *inlen) {
3967
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3968
}
3969
3970
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3973
}
3974
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3975
    const unsigned char* in, int *inlen) {
3976
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3977
}
3978
3979
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3980
    const unsigned char* in, int *inlen) {
3981
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3982
}
3983
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3984
    const unsigned char* in, int *inlen) {
3985
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3986
}
3987
3988
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3989
    const unsigned char* in, int *inlen) {
3990
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3991
}
3992
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3993
    const unsigned char* in, int *inlen) {
3994
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3995
}
3996
3997
#endif
3998
#endif
3999