Coverage Report

Created: 2024-08-17 06:44

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
1.49k
{
103
1.49k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
1.49k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
1.49k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
1.49k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
14
              const unsigned char* in, int *inlen) {
182
14
    unsigned char* outstart = out;
183
14
    const unsigned char* base = in;
184
14
    const unsigned char* processed = in;
185
14
    unsigned char* outend = out + *outlen;
186
14
    const unsigned char* inend;
187
14
    unsigned int c;
188
189
14
    inend = in + (*inlen);
190
1.59k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
1.58k
  c= *in++;
192
193
1.58k
        if (out >= outend)
194
0
      break;
195
1.58k
        if (c < 0x80) {
196
1.58k
      *out++ = c;
197
1.58k
  } else {
198
0
      *outlen = out - outstart;
199
0
      *inlen = processed - base;
200
0
      return(-1);
201
0
  }
202
203
1.58k
  processed = (const unsigned char*) in;
204
1.58k
    }
205
14
    *outlen = out - outstart;
206
14
    *inlen = processed - base;
207
14
    return(*outlen);
208
14
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
4
              const unsigned char* in, int *inlen) {
229
4
    const unsigned char* processed = in;
230
4
    const unsigned char* outend;
231
4
    const unsigned char* outstart = out;
232
4
    const unsigned char* instart = in;
233
4
    const unsigned char* inend;
234
4
    unsigned int c, d;
235
4
    int trailing;
236
237
4
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
4
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
2
  *outlen = 0;
243
2
  *inlen = 0;
244
2
  return(0);
245
2
    }
246
2
    inend = in + (*inlen);
247
2
    outend = out + (*outlen);
248
594
    while (in < inend) {
249
592
  d = *in++;
250
592
  if      (d < 0x80)  { c= d; trailing= 0; }
251
0
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
592
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
592
  for ( ; trailing; trailing--) {
271
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
0
      c <<= 6;
274
0
      c |= d & 0x3F;
275
0
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
592
  if (c < 0x80) {
279
592
      if (out >= outend)
280
0
    break;
281
592
      *out++ = c;
282
592
  } else {
283
      /* no chance for this in Ascii */
284
0
      *outlen = out - outstart;
285
0
      *inlen = processed - instart;
286
0
      return(-2);
287
0
  }
288
592
  processed = in;
289
592
    }
290
2
    *outlen = out - outstart;
291
2
    *inlen = processed - instart;
292
2
    return(*outlen);
293
2
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
7.86k
              const unsigned char* in, int *inlen) {
313
7.86k
    unsigned char* outstart = out;
314
7.86k
    const unsigned char* base = in;
315
7.86k
    unsigned char* outend;
316
7.86k
    const unsigned char* inend;
317
7.86k
    const unsigned char* instop;
318
319
7.86k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
7.86k
    outend = out + *outlen;
323
7.86k
    inend = in + (*inlen);
324
7.86k
    instop = inend;
325
326
33.8k
    while ((in < inend) && (out < outend - 1)) {
327
26.0k
  if (*in >= 0x80) {
328
18.2k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
18.2k
            *out++ = ((*in) & 0x3F) | 0x80;
330
18.2k
      ++in;
331
18.2k
  }
332
26.0k
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
2.30M
  while ((in < instop) && (*in < 0x80)) {
334
2.27M
      *out++ = *in++;
335
2.27M
  }
336
26.0k
    }
337
7.86k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
7.86k
    *outlen = out - outstart;
341
7.86k
    *inlen = in - base;
342
7.86k
    return(*outlen);
343
7.86k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
256
{
362
256
    int len;
363
364
256
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
256
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
31
        *outlen = 0;
369
31
        *inlenb = 0;
370
31
        return(0);
371
31
    }
372
225
    if (*outlen > *inlenb) {
373
225
  len = *inlenb;
374
225
    } else {
375
0
  len = *outlen;
376
0
    }
377
225
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
225
    memcpy(out, inb, len);
386
387
225
    *outlen = len;
388
225
    *inlenb = len;
389
225
    return(*outlen);
390
225
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
212
              const unsigned char* in, int *inlen) {
413
212
    const unsigned char* processed = in;
414
212
    const unsigned char* outend;
415
212
    const unsigned char* outstart = out;
416
212
    const unsigned char* instart = in;
417
212
    const unsigned char* inend;
418
212
    unsigned int c, d;
419
212
    int trailing;
420
421
212
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
212
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
60
  *outlen = 0;
427
60
  *inlen = 0;
428
60
  return(0);
429
60
    }
430
152
    inend = in + (*inlen);
431
152
    outend = out + (*outlen);
432
577k
    while (in < inend) {
433
577k
  d = *in++;
434
577k
  if      (d < 0x80)  { c= d; trailing= 0; }
435
1.46k
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
0
      *outlen = out - outstart;
438
0
      *inlen = processed - instart;
439
0
      return(-2);
440
1.46k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
0
  else {
444
      /* no chance for this in IsoLat1 */
445
0
      *outlen = out - outstart;
446
0
      *inlen = processed - instart;
447
0
      return(-2);
448
0
  }
449
450
577k
  if (inend - in < trailing) {
451
0
      break;
452
0
  }
453
454
578k
  for ( ; trailing; trailing--) {
455
1.46k
      if (in >= inend)
456
0
    break;
457
1.46k
      if (((d= *in++) & 0xC0) != 0x80) {
458
0
    *outlen = out - outstart;
459
0
    *inlen = processed - instart;
460
0
    return(-2);
461
0
      }
462
1.46k
      c <<= 6;
463
1.46k
      c |= d & 0x3F;
464
1.46k
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
577k
  if (c <= 0xFF) {
468
577k
      if (out >= outend)
469
0
    break;
470
577k
      *out++ = c;
471
577k
  } else {
472
      /* no chance for this in IsoLat1 */
473
0
      *outlen = out - outstart;
474
0
      *inlen = processed - instart;
475
0
      return(-2);
476
0
  }
477
577k
  processed = in;
478
577k
    }
479
152
    *outlen = out - outstart;
480
152
    *inlen = processed - instart;
481
152
    return(*outlen);
482
152
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
11.0k
{
506
11.0k
    unsigned char* outstart = out;
507
11.0k
    const unsigned char* processed = inb;
508
11.0k
    unsigned char* outend;
509
11.0k
    unsigned short* in = (unsigned short*) inb;
510
11.0k
    unsigned short* inend;
511
11.0k
    unsigned int c, d, inlen;
512
11.0k
    unsigned char *tmp;
513
11.0k
    int bits;
514
515
11.0k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
11.0k
    outend = out + *outlen;
520
11.0k
    if ((*inlenb % 2) == 1)
521
7.59k
        (*inlenb)--;
522
11.0k
    inlen = *inlenb / 2;
523
11.0k
    inend = in + inlen;
524
241k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
231k
        if (xmlLittleEndian) {
526
231k
      c= *in++;
527
231k
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
231k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
666
      if (in >= inend) {           /* handle split mutli-byte characters */
535
174
    break;
536
174
      }
537
492
      if (xmlLittleEndian) {
538
492
    d = *in++;
539
492
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
492
            if ((d & 0xFC00) == 0xDC00) {
546
36
                c &= 0x03FF;
547
36
                c <<= 10;
548
36
                c |= d & 0x03FF;
549
36
                c += 0x10000;
550
36
            }
551
456
            else {
552
456
    *outlen = out - outstart;
553
456
    *inlenb = processed - inb;
554
456
          return(-2);
555
456
      }
556
492
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
230k
        if (out >= outend)
560
0
      break;
561
230k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
218k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
215k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
36
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
665k
        for ( ; bits >= 0; bits-= 6) {
567
434k
            if (out >= outend)
568
0
          break;
569
434k
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
434k
        }
571
230k
  processed = (const unsigned char*) in;
572
230k
    }
573
10.5k
    *outlen = out - outstart;
574
10.5k
    *inlenb = processed - inb;
575
10.5k
    return(*outlen);
576
11.0k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
388
{
596
388
    unsigned short* out = (unsigned short*) outb;
597
388
    const unsigned char* processed = in;
598
388
    const unsigned char *const instart = in;
599
388
    unsigned short* outstart= out;
600
388
    unsigned short* outend;
601
388
    const unsigned char* inend;
602
388
    unsigned int c, d;
603
388
    int trailing;
604
388
    unsigned char *tmp;
605
388
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
388
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
388
    if (in == NULL) {
610
0
  *outlen = 0;
611
0
  *inlen = 0;
612
0
  return(0);
613
0
    }
614
388
    inend= in + *inlen;
615
388
    outend = out + (*outlen / 2);
616
9.58k
    while (in < inend) {
617
9.40k
      d= *in++;
618
9.40k
      if      (d < 0x80)  { c= d; trailing= 0; }
619
602
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
185
    *outlen = (out - outstart) * 2;
622
185
    *inlen = processed - instart;
623
185
    return(-2);
624
417
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
307
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
172
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
26
      else {
628
  /* no chance for this in UTF-16 */
629
26
  *outlen = (out - outstart) * 2;
630
26
  *inlen = processed - instart;
631
26
  return(-2);
632
26
      }
633
634
9.19k
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
9.20k
      for ( ; trailing; trailing--) {
639
396
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
384
        break;
641
12
          c <<= 6;
642
12
          c |= d & 0x3F;
643
12
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
9.19k
        if (c < 0x10000) {
647
9.19k
            if (out >= outend)
648
0
          break;
649
9.19k
      if (xmlLittleEndian) {
650
9.19k
    *out++ = c;
651
9.19k
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
9.19k
        }
658
2
        else if (c < 0x110000) {
659
0
            if (out+1 >= outend)
660
0
          break;
661
0
            c -= 0x10000;
662
0
      if (xmlLittleEndian) {
663
0
    *out++ = 0xD800 | (c >> 10);
664
0
    *out++ = 0xDC00 | (c & 0x03FF);
665
0
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
0
        }
679
2
        else
680
2
      break;
681
9.19k
  processed = in;
682
9.19k
    }
683
177
    *outlen = (out - outstart) * 2;
684
177
    *inlen = processed - instart;
685
177
    return(*outlen);
686
388
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
440
{
705
440
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
52
        if (*outlen >= 2) {
710
52
      outb[0] = 0xFF;
711
52
      outb[1] = 0xFE;
712
52
      *outlen = 2;
713
52
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
52
      return(2);
719
52
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
52
    }
724
388
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
440
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
10.6k
{
749
10.6k
    unsigned char* outstart = out;
750
10.6k
    const unsigned char* processed = inb;
751
10.6k
    unsigned char* outend;
752
10.6k
    unsigned short* in = (unsigned short*) inb;
753
10.6k
    unsigned short* inend;
754
10.6k
    unsigned int c, d, inlen;
755
10.6k
    unsigned char *tmp;
756
10.6k
    int bits;
757
758
10.6k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
10.6k
    outend = out + *outlen;
763
10.6k
    if ((*inlenb % 2) == 1)
764
8.11k
        (*inlenb)--;
765
10.6k
    inlen = *inlenb / 2;
766
10.6k
    inend= in + inlen;
767
178k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
168k
  if (xmlLittleEndian) {
769
168k
      tmp = (unsigned char *) in;
770
168k
      c = *tmp++;
771
168k
      c = (c << 8) | *tmp;
772
168k
      in++;
773
168k
  } else {
774
0
      c= *in++;
775
0
  }
776
168k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
471
      if (in >= inend) {           /* handle split mutli-byte characters */
778
137
                break;
779
137
      }
780
334
      if (xmlLittleEndian) {
781
334
    tmp = (unsigned char *) in;
782
334
    d = *tmp++;
783
334
    d = (d << 8) | *tmp;
784
334
    in++;
785
334
      } else {
786
0
    d= *in++;
787
0
      }
788
334
            if ((d & 0xFC00) == 0xDC00) {
789
13
                c &= 0x03FF;
790
13
                c <<= 10;
791
13
                c |= d & 0x03FF;
792
13
                c += 0x10000;
793
13
            }
794
321
            else {
795
321
    *outlen = out - outstart;
796
321
    *inlenb = processed - inb;
797
321
          return(-2);
798
321
      }
799
334
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
167k
        if (out >= outend)
803
0
      break;
804
167k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
157k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
150k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
13
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
475k
        for ( ; bits >= 0; bits-= 6) {
810
307k
            if (out >= outend)
811
0
          break;
812
307k
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
307k
        }
814
167k
  processed = (const unsigned char*) in;
815
167k
    }
816
10.3k
    *outlen = out - outstart;
817
10.3k
    *inlenb = processed - inb;
818
10.3k
    return(*outlen);
819
10.6k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
0
{
839
0
    unsigned short* out = (unsigned short*) outb;
840
0
    const unsigned char* processed = in;
841
0
    const unsigned char *const instart = in;
842
0
    unsigned short* outstart= out;
843
0
    unsigned short* outend;
844
0
    const unsigned char* inend;
845
0
    unsigned int c, d;
846
0
    int trailing;
847
0
    unsigned char *tmp;
848
0
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
0
    if (in == NULL) {
853
0
  *outlen = 0;
854
0
  *inlen = 0;
855
0
  return(0);
856
0
    }
857
0
    inend= in + *inlen;
858
0
    outend = out + (*outlen / 2);
859
0
    while (in < inend) {
860
0
      d= *in++;
861
0
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
0
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
0
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
0
        if (c < 0x10000) {
889
0
            if (out >= outend)  break;
890
0
      if (xmlLittleEndian) {
891
0
    tmp = (unsigned char *) out;
892
0
    *tmp = c >> 8;
893
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
0
    out++;
895
0
      } else {
896
0
    *out++ = c;
897
0
      }
898
0
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
0
  processed = in;
922
0
    }
923
0
    *outlen = (out - outstart) * 2;
924
0
    *inlen = processed - instart;
925
0
    return(*outlen);
926
0
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
60.1k
{
949
60.1k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
60.1k
    if (len >= 4) {
952
60.1k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
60.1k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
135
      return(XML_CHAR_ENCODING_UCS4BE);
955
60.0k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
60.0k
      (in[2] == 0x00) && (in[3] == 0x00))
957
126
      return(XML_CHAR_ENCODING_UCS4LE);
958
59.9k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
59.9k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
9
      return(XML_CHAR_ENCODING_UCS4_2143);
961
59.9k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
59.9k
      (in[2] == 0x00) && (in[3] == 0x00))
963
15
      return(XML_CHAR_ENCODING_UCS4_3412);
964
59.9k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
59.9k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
6
      return(XML_CHAR_ENCODING_EBCDIC);
967
59.9k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
59.9k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
26.3k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
33.5k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
33.5k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
462
      return(XML_CHAR_ENCODING_UTF16LE);
978
33.0k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
33.0k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
30
      return(XML_CHAR_ENCODING_UTF16BE);
981
33.0k
    }
982
33.0k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
33.0k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
33.0k
      (in[2] == 0xBF))
989
18
      return(XML_CHAR_ENCODING_UTF8);
990
33.0k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
32.9k
    if (len >= 2) {
993
32.9k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
939
      return(XML_CHAR_ENCODING_UTF16BE);
995
32.0k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
939
      return(XML_CHAR_ENCODING_UTF16LE);
997
32.0k
    }
998
31.1k
    return(XML_CHAR_ENCODING_NONE);
999
32.9k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
3.22k
xmlGetEncodingAlias(const char *alias) {
1035
3.22k
    int i;
1036
3.22k
    char upper[100];
1037
1038
3.22k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
3.22k
    if (xmlCharEncodingAliases == NULL)
1042
3.22k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = toupper(alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = toupper(alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
8.49k
{
1167
8.49k
    const char *alias;
1168
8.49k
    char upper[500];
1169
8.49k
    int i;
1170
1171
8.49k
    if (name == NULL)
1172
7.77k
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
721
    alias = xmlGetEncodingAlias(name);
1178
721
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
9.80k
    for (i = 0;i < 499;i++) {
1182
9.80k
        upper[i] = toupper(name[i]);
1183
9.80k
  if (upper[i] == 0) break;
1184
9.80k
    }
1185
721
    upper[i] = 0;
1186
1187
721
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
721
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
690
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
690
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
638
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
638
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
638
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
638
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
638
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
290
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
290
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
290
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
230
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
230
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
230
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
230
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
230
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
230
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
230
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
230
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
230
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
230
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
230
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
230
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
230
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
230
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
230
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
102
    return(XML_CHAR_ENCODING_ERROR);
1235
230
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
372
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
372
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
348
        case XML_CHAR_ENCODING_UCS4LE:
1264
348
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
9
        case XML_CHAR_ENCODING_UCS4_2143:
1268
9
            return("ISO-10646-UCS-4");
1269
15
        case XML_CHAR_ENCODING_UCS4_3412:
1270
15
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS2:
1272
0
            return("ISO-10646-UCS-2");
1273
0
        case XML_CHAR_ENCODING_8859_1:
1274
0
      return("ISO-8859-1");
1275
0
        case XML_CHAR_ENCODING_8859_2:
1276
0
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
372
    }
1300
0
    return(NULL);
1301
372
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
39.7k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = toupper(name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
12
xmlInitEncodingInternal(void) {
1501
12
    unsigned short int tst = 0x1234;
1502
12
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
12
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
12
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
12
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
76.1k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
76.1k
    xmlCharEncodingHandlerPtr handler;
1592
1593
76.1k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
63.9k
        case XML_CHAR_ENCODING_NONE:
1597
63.9k
      return(NULL);
1598
8.68k
        case XML_CHAR_ENCODING_UTF8:
1599
8.68k
      return(NULL);
1600
1.76k
        case XML_CHAR_ENCODING_UTF16LE:
1601
1.76k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
1.29k
        case XML_CHAR_ENCODING_UTF16BE:
1603
1.29k
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
8
        case XML_CHAR_ENCODING_EBCDIC:
1605
8
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
8
            if (handler != NULL) return(handler);
1607
8
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
8
            if (handler != NULL) return(handler);
1609
8
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
8
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
180
        case XML_CHAR_ENCODING_UCS4BE:
1615
180
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
180
            if (handler != NULL) return(handler);
1617
180
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
180
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
168
        case XML_CHAR_ENCODING_UCS4LE:
1623
168
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
168
            if (handler != NULL) return(handler);
1625
168
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
168
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
12
        case XML_CHAR_ENCODING_UCS4_2143:
1631
12
      break;
1632
20
        case XML_CHAR_ENCODING_UCS4_3412:
1633
20
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
76.1k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
32
    return(NULL);
1712
76.1k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
2.50k
xmlFindCharEncodingHandler(const char *name) {
1725
2.50k
    const char *nalias;
1726
2.50k
    const char *norig;
1727
2.50k
    xmlCharEncoding alias;
1728
2.50k
#ifdef LIBXML_ICONV_ENABLED
1729
2.50k
    xmlCharEncodingHandlerPtr enc;
1730
2.50k
    iconv_t icv_in, icv_out;
1731
2.50k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
2.50k
    char upper[100];
1737
2.50k
    int i;
1738
1739
2.50k
    if (name == NULL) return(NULL);
1740
2.50k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
2.50k
    norig = name;
1746
2.50k
    nalias = xmlGetEncodingAlias(name);
1747
2.50k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
24.8k
    for (i = 0;i < 99;i++) {
1754
24.8k
        upper[i] = toupper(name[i]);
1755
24.8k
  if (upper[i] == 0) break;
1756
24.8k
    }
1757
2.50k
    upper[i] = 0;
1758
1759
19.0k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
17.3k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
835
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
17.3k
    }
1763
1764
1.66k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
1.66k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
1.66k
    icv_in = iconv_open("UTF-8", name);
1779
1.66k
    icv_out = iconv_open(name, "UTF-8");
1780
1.66k
    if (icv_in == (iconv_t) -1) {
1781
445
        icv_in = iconv_open("UTF-8", upper);
1782
445
    }
1783
1.66k
    if (icv_out == (iconv_t) -1) {
1784
445
  icv_out = iconv_open(upper, "UTF-8");
1785
445
    }
1786
1.66k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
1.22k
      enc = (xmlCharEncodingHandlerPtr)
1788
1.22k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
1.22k
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
1.22k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
1.22k
      enc->name = xmlMemStrdup(name);
1796
1.22k
      enc->input = NULL;
1797
1.22k
      enc->output = NULL;
1798
1.22k
      enc->iconv_in = icv_in;
1799
1.22k
      enc->iconv_out = icv_out;
1800
#ifdef DEBUG_ENCODING
1801
            xmlGenericError(xmlGenericErrorContext,
1802
        "Found iconv handler for encoding %s\n", name);
1803
#endif
1804
1.22k
      return enc;
1805
1.22k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1806
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1807
0
        "iconv : problems with filters for '%s'\n", name);
1808
0
      if (icv_in != (iconv_t) -1)
1809
0
    iconv_close(icv_in);
1810
0
      else
1811
0
    iconv_close(icv_out);
1812
0
    }
1813
445
#endif /* LIBXML_ICONV_ENABLED */
1814
#ifdef LIBXML_ICU_ENABLED
1815
    /* check whether icu can handle this */
1816
    ucv_in = openIcuConverter(name, 1);
1817
    ucv_out = openIcuConverter(name, 0);
1818
    if (ucv_in != NULL && ucv_out != NULL) {
1819
      encu = (xmlCharEncodingHandlerPtr)
1820
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1821
      if (encu == NULL) {
1822
                closeIcuConverter(ucv_in);
1823
                closeIcuConverter(ucv_out);
1824
    return(NULL);
1825
      }
1826
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1827
      encu->name = xmlMemStrdup(name);
1828
      encu->input = NULL;
1829
      encu->output = NULL;
1830
      encu->uconv_in = ucv_in;
1831
      encu->uconv_out = ucv_out;
1832
#ifdef DEBUG_ENCODING
1833
            xmlGenericError(xmlGenericErrorContext,
1834
        "Found ICU converter handler for encoding %s\n", name);
1835
#endif
1836
      return encu;
1837
    } else if (ucv_in != NULL || ucv_out != NULL) {
1838
            closeIcuConverter(ucv_in);
1839
            closeIcuConverter(ucv_out);
1840
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1841
        "ICU converter : problems with filters for '%s'\n", name);
1842
    }
1843
#endif /* LIBXML_ICU_ENABLED */
1844
1845
#ifdef DEBUG_ENCODING
1846
    xmlGenericError(xmlGenericErrorContext,
1847
      "No handler found for encoding %s\n", name);
1848
#endif
1849
1850
    /*
1851
     * Fallback using the canonical names
1852
     */
1853
445
    alias = xmlParseCharEncoding(norig);
1854
445
    if (alias != XML_CHAR_ENCODING_ERROR) {
1855
348
        const char* canon;
1856
348
        canon = xmlGetCharEncodingName(alias);
1857
348
        if ((canon != NULL) && (strcmp(name, canon))) {
1858
0
      return(xmlFindCharEncodingHandler(canon));
1859
0
        }
1860
348
    }
1861
1862
    /* If "none of the above", give up */
1863
445
    return(NULL);
1864
445
}
1865
1866
/************************************************************************
1867
 *                  *
1868
 *    ICONV based generic conversion functions    *
1869
 *                  *
1870
 ************************************************************************/
1871
1872
#ifdef LIBXML_ICONV_ENABLED
1873
/**
1874
 * xmlIconvWrapper:
1875
 * @cd:   iconv converter data structure
1876
 * @out:  a pointer to an array of bytes to store the result
1877
 * @outlen:  the length of @out
1878
 * @in:  a pointer to an array of input bytes
1879
 * @inlen:  the length of @in
1880
 *
1881
 * Returns 0 if success, or
1882
 *     -1 by lack of space, or
1883
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1884
 *        the result of transformation can't fit into the encoding we want), or
1885
 *     -3 if there the last byte can't form a single output char.
1886
 *
1887
 * The value of @inlen after return is the number of octets consumed
1888
 *     as the return value is positive, else unpredictable.
1889
 * The value of @outlen after return is the number of octets produced.
1890
 */
1891
static int
1892
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1893
6.63k
                const unsigned char *in, int *inlen) {
1894
6.63k
    size_t icv_inlen, icv_outlen;
1895
6.63k
    const char *icv_in = (const char *) in;
1896
6.63k
    char *icv_out = (char *) out;
1897
6.63k
    size_t ret;
1898
1899
6.63k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900
131
        if (outlen != NULL) *outlen = 0;
1901
131
        return(-1);
1902
131
    }
1903
6.50k
    icv_inlen = *inlen;
1904
6.50k
    icv_outlen = *outlen;
1905
    /*
1906
     * Some versions take const, other versions take non-const input.
1907
     */
1908
6.50k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1909
6.50k
    *inlen -= icv_inlen;
1910
6.50k
    *outlen -= icv_outlen;
1911
6.50k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1912
3.58k
#ifdef EILSEQ
1913
3.58k
        if (errno == EILSEQ) {
1914
721
            return -2;
1915
721
        } else
1916
2.86k
#endif
1917
2.86k
#ifdef E2BIG
1918
2.86k
        if (errno == E2BIG) {
1919
0
            return -1;
1920
0
        } else
1921
2.86k
#endif
1922
2.86k
#ifdef EINVAL
1923
2.86k
        if (errno == EINVAL) {
1924
2.86k
            return -3;
1925
2.86k
        } else
1926
0
#endif
1927
0
        {
1928
0
            return -3;
1929
0
        }
1930
3.58k
    }
1931
2.92k
    return 0;
1932
6.50k
}
1933
#endif /* LIBXML_ICONV_ENABLED */
1934
1935
/************************************************************************
1936
 *                  *
1937
 *    ICU based generic conversion functions    *
1938
 *                  *
1939
 ************************************************************************/
1940
1941
#ifdef LIBXML_ICU_ENABLED
1942
/**
1943
 * xmlUconvWrapper:
1944
 * @cd: ICU uconverter data structure
1945
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1946
 * @out:  a pointer to an array of bytes to store the result
1947
 * @outlen:  the length of @out
1948
 * @in:  a pointer to an array of input bytes
1949
 * @inlen:  the length of @in
1950
 * @flush: if true, indicates end of input
1951
 *
1952
 * Returns 0 if success, or
1953
 *     -1 by lack of space, or
1954
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1955
 *        the result of transformation can't fit into the encoding we want), or
1956
 *     -3 if there the last byte can't form a single output char.
1957
 *
1958
 * The value of @inlen after return is the number of octets consumed
1959
 *     as the return value is positive, else unpredictable.
1960
 * The value of @outlen after return is the number of octets produced.
1961
 */
1962
static int
1963
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1964
                const unsigned char *in, int *inlen, int flush) {
1965
    const char *ucv_in = (const char *) in;
1966
    char *ucv_out = (char *) out;
1967
    UErrorCode err = U_ZERO_ERROR;
1968
1969
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1970
        if (outlen != NULL) *outlen = 0;
1971
        return(-1);
1972
    }
1973
1974
    if (toUnicode) {
1975
        /* encoding => UTF-16 => UTF-8 */
1976
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1977
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1978
                       &cd->pivot_source, &cd->pivot_target,
1979
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1980
    } else {
1981
        /* UTF-8 => UTF-16 => encoding */
1982
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1983
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1984
                       &cd->pivot_source, &cd->pivot_target,
1985
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1986
    }
1987
    *inlen = ucv_in - (const char*) in;
1988
    *outlen = ucv_out - (char *) out;
1989
    if (U_SUCCESS(err)) {
1990
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1991
        if (flush)
1992
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1993
        return 0;
1994
    }
1995
    if (err == U_BUFFER_OVERFLOW_ERROR)
1996
        return -1;
1997
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1998
        return -2;
1999
    return -3;
2000
}
2001
#endif /* LIBXML_ICU_ENABLED */
2002
2003
/************************************************************************
2004
 *                  *
2005
 *    The real API used by libxml for on-the-fly conversion *
2006
 *                  *
2007
 ************************************************************************/
2008
2009
/**
2010
 * xmlEncInputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 * @flush:  flush (ICU-related)
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
35.8k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2031
35.8k
    int ret;
2032
35.8k
    (void)flush;
2033
2034
35.8k
    if (handler->input != NULL) {
2035
29.5k
        ret = handler->input(out, outlen, in, inlen);
2036
29.5k
        if (ret > 0)
2037
14.0k
           ret = 0;
2038
29.5k
    }
2039
6.36k
#ifdef LIBXML_ICONV_ENABLED
2040
6.36k
    else if (handler->iconv_in != NULL) {
2041
6.36k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2042
6.36k
    }
2043
0
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_in != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2047
                              flush);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
0
    else {
2051
0
        *outlen = 0;
2052
0
        *inlen = 0;
2053
0
        ret = -2;
2054
0
    }
2055
2056
35.8k
    return(ret);
2057
35.8k
}
2058
2059
/**
2060
 * xmlEncOutputChunk:
2061
 * @handler:  encoding handler
2062
 * @out:  a pointer to an array of bytes to store the result
2063
 * @outlen:  the length of @out
2064
 * @in:  a pointer to an array of input bytes
2065
 * @inlen:  the length of @in
2066
 *
2067
 * Returns 0 if success, or
2068
 *     -1 by lack of space, or
2069
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2070
 *        the result of transformation can't fit into the encoding we want), or
2071
 *     -3 if there the last byte can't form a single output char.
2072
 *     -4 if no output function was found.
2073
 *
2074
 * The value of @inlen after return is the number of octets consumed
2075
 *     as the return value is 0, else unpredictable.
2076
 * The value of @outlen after return is the number of octets produced.
2077
 */
2078
static int
2079
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2080
1.18k
                  int *outlen, const unsigned char *in, int *inlen) {
2081
1.18k
    int ret;
2082
2083
1.18k
    if (handler->output != NULL) {
2084
912
        ret = handler->output(out, outlen, in, inlen);
2085
912
        if (ret > 0)
2086
607
           ret = 0;
2087
912
    }
2088
272
#ifdef LIBXML_ICONV_ENABLED
2089
272
    else if (handler->iconv_out != NULL) {
2090
272
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2091
272
    }
2092
0
#endif /* LIBXML_ICONV_ENABLED */
2093
#ifdef LIBXML_ICU_ENABLED
2094
    else if (handler->uconv_out != NULL) {
2095
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2096
                              1);
2097
    }
2098
#endif /* LIBXML_ICU_ENABLED */
2099
0
    else {
2100
0
        *outlen = 0;
2101
0
        *inlen = 0;
2102
0
        ret = -4;
2103
0
    }
2104
2105
1.18k
    return(ret);
2106
1.18k
}
2107
2108
/**
2109
 * xmlCharEncFirstLine:
2110
 * @handler:  char encoding transformation data structure
2111
 * @out:  an xmlBuffer for the output.
2112
 * @in:  an xmlBuffer for the input
2113
 *
2114
 * Front-end for the encoding handler input function, but handle only
2115
 * the very first line, i.e. limit itself to 45 chars.
2116
 *
2117
 * Returns the number of byte written if success, or
2118
 *     -1 general error
2119
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2120
 *        the result of transformation can't fit into the encoding we want), or
2121
 */
2122
int
2123
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2124
0
                    xmlBufferPtr in) {
2125
0
    int ret;
2126
0
    int written;
2127
0
    int toconv;
2128
2129
0
    if (handler == NULL) return(-1);
2130
0
    if (out == NULL) return(-1);
2131
0
    if (in == NULL) return(-1);
2132
2133
    /* calculate space available */
2134
0
    written = out->size - out->use - 1; /* count '\0' */
2135
0
    toconv = in->use;
2136
    /*
2137
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2138
     * 45 chars should be sufficient to reach the end of the encoding
2139
     * declaration without going too far inside the document content.
2140
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2141
     * The actual value depending on guessed encoding is passed as @len
2142
     * if provided
2143
     */
2144
0
    if (toconv > 180)
2145
0
        toconv = 180;
2146
0
    if (toconv * 2 >= written) {
2147
0
        xmlBufferGrow(out, toconv * 2);
2148
0
  written = out->size - out->use - 1;
2149
0
    }
2150
2151
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2152
0
                           in->content, &toconv, 0);
2153
0
    xmlBufferShrink(in, toconv);
2154
0
    out->use += written;
2155
0
    out->content[out->use] = 0;
2156
0
    if (ret == -1) ret = -3;
2157
2158
#ifdef DEBUG_ENCODING
2159
    switch (ret) {
2160
        case 0:
2161
      xmlGenericError(xmlGenericErrorContext,
2162
        "converted %d bytes to %d bytes of input\n",
2163
              toconv, written);
2164
      break;
2165
        case -1:
2166
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2167
              toconv, written, in->use);
2168
      break;
2169
        case -2:
2170
      xmlGenericError(xmlGenericErrorContext,
2171
        "input conversion failed due to input error\n");
2172
      break;
2173
        case -3:
2174
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2175
              toconv, written, in->use);
2176
      break;
2177
  default:
2178
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2179
    }
2180
#endif /* DEBUG_ENCODING */
2181
    /*
2182
     * Ignore when input buffer is not on a boundary
2183
     */
2184
0
    if (ret == -3) ret = 0;
2185
0
    if (ret == -1) ret = 0;
2186
0
    return(written ? written : ret);
2187
0
}
2188
2189
/**
2190
 * xmlCharEncFirstLineInput:
2191
 * @input: a parser input buffer
2192
 * @len:  number of bytes to convert for the first line, or -1
2193
 *
2194
 * Front-end for the encoding handler input function, but handle only
2195
 * the very first line. Point is that this is based on autodetection
2196
 * of the encoding and once that first line is converted we may find
2197
 * out that a different decoder is needed to process the input.
2198
 *
2199
 * Returns the number of byte written if success, or
2200
 *     -1 general error
2201
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2202
 *        the result of transformation can't fit into the encoding we want), or
2203
 */
2204
int
2205
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2206
3.27k
{
2207
3.27k
    int ret;
2208
3.27k
    size_t written;
2209
3.27k
    size_t toconv;
2210
3.27k
    int c_in;
2211
3.27k
    int c_out;
2212
3.27k
    xmlBufPtr in;
2213
3.27k
    xmlBufPtr out;
2214
2215
3.27k
    if ((input == NULL) || (input->encoder == NULL) ||
2216
3.27k
        (input->buffer == NULL) || (input->raw == NULL))
2217
0
        return (-1);
2218
3.27k
    out = input->buffer;
2219
3.27k
    in = input->raw;
2220
2221
3.27k
    toconv = xmlBufUse(in);
2222
3.27k
    if (toconv == 0)
2223
0
        return (0);
2224
3.27k
    written = xmlBufAvail(out);
2225
    /*
2226
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2227
     * 45 chars should be sufficient to reach the end of the encoding
2228
     * declaration without going too far inside the document content.
2229
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2230
     * The actual value depending on guessed encoding is passed as @len
2231
     * if provided
2232
     */
2233
3.27k
    if (len >= 0) {
2234
1.85k
        if (toconv > (unsigned int) len)
2235
1.19k
            toconv = len;
2236
1.85k
    } else {
2237
1.41k
        if (toconv > 180)
2238
792
            toconv = 180;
2239
1.41k
    }
2240
3.27k
    if (toconv * 2 >= written) {
2241
0
        xmlBufGrow(out, toconv * 2);
2242
0
        written = xmlBufAvail(out);
2243
0
    }
2244
3.27k
    if (written > 360)
2245
3.27k
        written = 360;
2246
2247
3.27k
    c_in = toconv;
2248
3.27k
    c_out = written;
2249
3.27k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2250
3.27k
                           xmlBufContent(in), &c_in, 0);
2251
3.27k
    xmlBufShrink(in, c_in);
2252
3.27k
    xmlBufAddLen(out, c_out);
2253
3.27k
    if (ret == -1)
2254
0
        ret = -3;
2255
2256
3.27k
    switch (ret) {
2257
2.89k
        case 0:
2258
#ifdef DEBUG_ENCODING
2259
            xmlGenericError(xmlGenericErrorContext,
2260
                            "converted %d bytes to %d bytes of input\n",
2261
                            c_in, c_out);
2262
#endif
2263
2.89k
            break;
2264
0
        case -1:
2265
#ifdef DEBUG_ENCODING
2266
            xmlGenericError(xmlGenericErrorContext,
2267
                         "converted %d bytes to %d bytes of input, %d left\n",
2268
                            c_in, c_out, (int)xmlBufUse(in));
2269
#endif
2270
0
            break;
2271
83
        case -3:
2272
#ifdef DEBUG_ENCODING
2273
            xmlGenericError(xmlGenericErrorContext,
2274
                        "converted %d bytes to %d bytes of input, %d left\n",
2275
                            c_in, c_out, (int)xmlBufUse(in));
2276
#endif
2277
83
            break;
2278
298
        case -2: {
2279
298
            char buf[50];
2280
298
            const xmlChar *content = xmlBufContent(in);
2281
2282
298
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2283
298
         content[0], content[1],
2284
298
         content[2], content[3]);
2285
298
      buf[49] = 0;
2286
298
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2287
298
        "input conversion failed due to input error, bytes %s\n",
2288
298
               buf);
2289
298
        }
2290
3.27k
    }
2291
    /*
2292
     * Ignore when input buffer is not on a boundary
2293
     */
2294
3.27k
    if (ret == -3) ret = 0;
2295
3.27k
    if (ret == -1) ret = 0;
2296
3.27k
    return(c_out ? c_out : ret);
2297
3.27k
}
2298
2299
/**
2300
 * xmlCharEncInput:
2301
 * @input: a parser input buffer
2302
 * @flush: try to flush all the raw buffer
2303
 *
2304
 * Generic front-end for the encoding handler on parser input
2305
 *
2306
 * Returns the number of byte written if success, or
2307
 *     -1 general error
2308
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2309
 *        the result of transformation can't fit into the encoding we want), or
2310
 */
2311
int
2312
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2313
50.9k
{
2314
50.9k
    int ret;
2315
50.9k
    size_t written;
2316
50.9k
    size_t toconv;
2317
50.9k
    int c_in;
2318
50.9k
    int c_out;
2319
50.9k
    xmlBufPtr in;
2320
50.9k
    xmlBufPtr out;
2321
2322
50.9k
    if ((input == NULL) || (input->encoder == NULL) ||
2323
50.9k
        (input->buffer == NULL) || (input->raw == NULL))
2324
0
        return (-1);
2325
50.9k
    out = input->buffer;
2326
50.9k
    in = input->raw;
2327
2328
50.9k
    toconv = xmlBufUse(in);
2329
50.9k
    if (toconv == 0)
2330
18.3k
        return (0);
2331
32.6k
    if ((toconv > 64 * 1024) && (flush == 0))
2332
0
        toconv = 64 * 1024;
2333
32.6k
    written = xmlBufAvail(out);
2334
32.6k
    if (toconv * 2 >= written) {
2335
279
        if (xmlBufGrow(out, toconv * 2) < 0)
2336
0
            return (-1);
2337
279
        written = xmlBufAvail(out);
2338
279
    }
2339
32.6k
    if ((written > 128 * 1024) && (flush == 0))
2340
0
        written = 128 * 1024;
2341
2342
32.6k
    c_in = toconv;
2343
32.6k
    c_out = written;
2344
32.6k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2345
32.6k
                           xmlBufContent(in), &c_in, flush);
2346
32.6k
    xmlBufShrink(in, c_in);
2347
32.6k
    xmlBufAddLen(out, c_out);
2348
32.6k
    if (ret == -1)
2349
0
        ret = -3;
2350
2351
32.6k
    switch (ret) {
2352
28.6k
        case 0:
2353
#ifdef DEBUG_ENCODING
2354
            xmlGenericError(xmlGenericErrorContext,
2355
                            "converted %d bytes to %d bytes of input\n",
2356
                            c_in, c_out);
2357
#endif
2358
28.6k
            break;
2359
0
        case -1:
2360
#ifdef DEBUG_ENCODING
2361
            xmlGenericError(xmlGenericErrorContext,
2362
                         "converted %d bytes to %d bytes of input, %d left\n",
2363
                            c_in, c_out, (int)xmlBufUse(in));
2364
#endif
2365
0
            break;
2366
2.77k
        case -3:
2367
#ifdef DEBUG_ENCODING
2368
            xmlGenericError(xmlGenericErrorContext,
2369
                        "converted %d bytes to %d bytes of input, %d left\n",
2370
                            c_in, c_out, (int)xmlBufUse(in));
2371
#endif
2372
2.77k
            break;
2373
1.19k
        case -2: {
2374
1.19k
            char buf[50];
2375
1.19k
            const xmlChar *content = xmlBufContent(in);
2376
2377
1.19k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2378
1.19k
         content[0], content[1],
2379
1.19k
         content[2], content[3]);
2380
1.19k
      buf[49] = 0;
2381
1.19k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2382
1.19k
        "input conversion failed due to input error, bytes %s\n",
2383
1.19k
               buf);
2384
1.19k
        }
2385
32.6k
    }
2386
    /*
2387
     * Ignore when input buffer is not on a boundary
2388
     */
2389
32.6k
    if (ret == -3)
2390
2.77k
        ret = 0;
2391
32.6k
    return (c_out? c_out : ret);
2392
32.6k
}
2393
2394
/**
2395
 * xmlCharEncInFunc:
2396
 * @handler:  char encoding transformation data structure
2397
 * @out:  an xmlBuffer for the output.
2398
 * @in:  an xmlBuffer for the input
2399
 *
2400
 * Generic front-end for the encoding handler input function
2401
 *
2402
 * Returns the number of byte written if success, or
2403
 *     -1 general error
2404
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2405
 *        the result of transformation can't fit into the encoding we want), or
2406
 */
2407
int
2408
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2409
                 xmlBufferPtr in)
2410
0
{
2411
0
    int ret;
2412
0
    int written;
2413
0
    int toconv;
2414
2415
0
    if (handler == NULL)
2416
0
        return (-1);
2417
0
    if (out == NULL)
2418
0
        return (-1);
2419
0
    if (in == NULL)
2420
0
        return (-1);
2421
2422
0
    toconv = in->use;
2423
0
    if (toconv == 0)
2424
0
        return (0);
2425
0
    written = out->size - out->use -1; /* count '\0' */
2426
0
    if (toconv * 2 >= written) {
2427
0
        xmlBufferGrow(out, out->size + toconv * 2);
2428
0
        written = out->size - out->use - 1;
2429
0
    }
2430
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2431
0
                           in->content, &toconv, 1);
2432
0
    xmlBufferShrink(in, toconv);
2433
0
    out->use += written;
2434
0
    out->content[out->use] = 0;
2435
0
    if (ret == -1)
2436
0
        ret = -3;
2437
2438
0
    switch (ret) {
2439
0
        case 0:
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                            "converted %d bytes to %d bytes of input\n",
2443
                            toconv, written);
2444
#endif
2445
0
            break;
2446
0
        case -1:
2447
#ifdef DEBUG_ENCODING
2448
            xmlGenericError(xmlGenericErrorContext,
2449
                         "converted %d bytes to %d bytes of input, %d left\n",
2450
                            toconv, written, in->use);
2451
#endif
2452
0
            break;
2453
0
        case -3:
2454
#ifdef DEBUG_ENCODING
2455
            xmlGenericError(xmlGenericErrorContext,
2456
                        "converted %d bytes to %d bytes of input, %d left\n",
2457
                            toconv, written, in->use);
2458
#endif
2459
0
            break;
2460
0
        case -2: {
2461
0
            char buf[50];
2462
2463
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2464
0
         in->content[0], in->content[1],
2465
0
         in->content[2], in->content[3]);
2466
0
      buf[49] = 0;
2467
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2468
0
        "input conversion failed due to input error, bytes %s\n",
2469
0
               buf);
2470
0
        }
2471
0
    }
2472
    /*
2473
     * Ignore when input buffer is not on a boundary
2474
     */
2475
0
    if (ret == -3)
2476
0
        ret = 0;
2477
0
    return (written? written : ret);
2478
0
}
2479
2480
#ifdef LIBXML_OUTPUT_ENABLED
2481
/**
2482
 * xmlCharEncOutput:
2483
 * @output: a parser output buffer
2484
 * @init: is this an initialization call without data
2485
 *
2486
 * Generic front-end for the encoding handler on parser output
2487
 * a first call with @init == 1 has to be made first to initiate the
2488
 * output in case of non-stateless encoding needing to initiate their
2489
 * state or the output (like the BOM in UTF16).
2490
 * In case of UTF8 sequence conversion errors for the given encoder,
2491
 * the content will be automatically remapped to a CharRef sequence.
2492
 *
2493
 * Returns the number of byte written if success, or
2494
 *     -1 general error
2495
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2496
 *        the result of transformation can't fit into the encoding we want), or
2497
 */
2498
int
2499
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2500
1.11k
{
2501
1.11k
    int ret;
2502
1.11k
    size_t written;
2503
1.11k
    int writtentot = 0;
2504
1.11k
    size_t toconv;
2505
1.11k
    int c_in;
2506
1.11k
    int c_out;
2507
1.11k
    xmlBufPtr in;
2508
1.11k
    xmlBufPtr out;
2509
2510
1.11k
    if ((output == NULL) || (output->encoder == NULL) ||
2511
1.11k
        (output->buffer == NULL) || (output->conv == NULL))
2512
0
        return (-1);
2513
1.11k
    out = output->conv;
2514
1.11k
    in = output->buffer;
2515
2516
1.27k
retry:
2517
2518
1.27k
    written = xmlBufAvail(out);
2519
2520
    /*
2521
     * First specific handling of the initialization call
2522
     */
2523
1.27k
    if (init) {
2524
276
        c_in = 0;
2525
276
        c_out = written;
2526
        /* TODO: Check return value. */
2527
276
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2528
276
                          NULL, &c_in);
2529
276
        xmlBufAddLen(out, c_out);
2530
#ifdef DEBUG_ENCODING
2531
  xmlGenericError(xmlGenericErrorContext,
2532
    "initialized encoder\n");
2533
#endif
2534
276
        return(c_out);
2535
276
    }
2536
2537
    /*
2538
     * Conversion itself.
2539
     */
2540
996
    toconv = xmlBufUse(in);
2541
996
    if (toconv == 0)
2542
246
        return (writtentot);
2543
750
    if (toconv > 64 * 1024)
2544
0
        toconv = 64 * 1024;
2545
750
    if (toconv * 4 >= written) {
2546
70
        xmlBufGrow(out, toconv * 4);
2547
70
        written = xmlBufAvail(out);
2548
70
    }
2549
750
    if (written > 256 * 1024)
2550
6
        written = 256 * 1024;
2551
2552
750
    c_in = toconv;
2553
750
    c_out = written;
2554
750
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2555
750
                            xmlBufContent(in), &c_in);
2556
750
    xmlBufShrink(in, c_in);
2557
750
    xmlBufAddLen(out, c_out);
2558
750
    writtentot += c_out;
2559
750
    if (ret == -1) {
2560
0
        if (c_out > 0) {
2561
            /* Can be a limitation of iconv or uconv */
2562
0
            goto retry;
2563
0
        }
2564
0
        ret = -3;
2565
0
    }
2566
2567
    /*
2568
     * Attempt to handle error cases
2569
     */
2570
750
    switch (ret) {
2571
534
        case 0:
2572
#ifdef DEBUG_ENCODING
2573
      xmlGenericError(xmlGenericErrorContext,
2574
        "converted %d bytes to %d bytes of output\n",
2575
              c_in, c_out);
2576
#endif
2577
534
      break;
2578
0
        case -1:
2579
#ifdef DEBUG_ENCODING
2580
      xmlGenericError(xmlGenericErrorContext,
2581
        "output conversion failed by lack of space\n");
2582
#endif
2583
0
      break;
2584
0
        case -3:
2585
#ifdef DEBUG_ENCODING
2586
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2587
              c_in, c_out, (int) xmlBufUse(in));
2588
#endif
2589
0
      break;
2590
0
        case -4:
2591
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2592
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2593
0
            ret = -1;
2594
0
            break;
2595
216
        case -2: {
2596
216
      xmlChar charref[20];
2597
216
      int len = xmlBufUse(in);
2598
216
            xmlChar *content = xmlBufContent(in);
2599
216
      int cur, charrefLen;
2600
2601
216
      cur = xmlGetUTF8Char(content, &len);
2602
216
      if (cur <= 0)
2603
58
                break;
2604
2605
#ifdef DEBUG_ENCODING
2606
            xmlGenericError(xmlGenericErrorContext,
2607
                    "handling output conversion error\n");
2608
            xmlGenericError(xmlGenericErrorContext,
2609
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2610
                    content[0], content[1],
2611
                    content[2], content[3]);
2612
#endif
2613
            /*
2614
             * Removes the UTF8 sequence, and replace it by a charref
2615
             * and continue the transcoding phase, hoping the error
2616
             * did not mangle the encoder state.
2617
             */
2618
158
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2619
158
                             "&#%d;", cur);
2620
158
            xmlBufShrink(in, len);
2621
158
            xmlBufGrow(out, charrefLen * 4);
2622
158
            c_out = xmlBufAvail(out);
2623
158
            c_in = charrefLen;
2624
158
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2625
158
                                    charref, &c_in);
2626
2627
158
      if ((ret < 0) || (c_in != charrefLen)) {
2628
0
    char buf[50];
2629
2630
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2631
0
       content[0], content[1],
2632
0
       content[2], content[3]);
2633
0
    buf[49] = 0;
2634
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2635
0
        "output conversion failed due to conv error, bytes %s\n",
2636
0
             buf);
2637
0
    content[0] = ' ';
2638
0
                break;
2639
0
      }
2640
2641
158
            xmlBufAddLen(out, c_out);
2642
158
            writtentot += c_out;
2643
158
            goto retry;
2644
158
  }
2645
750
    }
2646
592
    return(writtentot ? writtentot : ret);
2647
750
}
2648
#endif
2649
2650
/**
2651
 * xmlCharEncOutFunc:
2652
 * @handler:  char encoding transformation data structure
2653
 * @out:  an xmlBuffer for the output.
2654
 * @in:  an xmlBuffer for the input
2655
 *
2656
 * Generic front-end for the encoding handler output function
2657
 * a first call with @in == NULL has to be made firs to initiate the
2658
 * output in case of non-stateless encoding needing to initiate their
2659
 * state or the output (like the BOM in UTF16).
2660
 * In case of UTF8 sequence conversion errors for the given encoder,
2661
 * the content will be automatically remapped to a CharRef sequence.
2662
 *
2663
 * Returns the number of byte written if success, or
2664
 *     -1 general error
2665
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2666
 *        the result of transformation can't fit into the encoding we want), or
2667
 */
2668
int
2669
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2670
0
                  xmlBufferPtr in) {
2671
0
    int ret;
2672
0
    int written;
2673
0
    int writtentot = 0;
2674
0
    int toconv;
2675
2676
0
    if (handler == NULL) return(-1);
2677
0
    if (out == NULL) return(-1);
2678
2679
0
retry:
2680
2681
0
    written = out->size - out->use;
2682
2683
0
    if (written > 0)
2684
0
  written--; /* Gennady: count '/0' */
2685
2686
    /*
2687
     * First specific handling of in = NULL, i.e. the initialization call
2688
     */
2689
0
    if (in == NULL) {
2690
0
        toconv = 0;
2691
        /* TODO: Check return value. */
2692
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2693
0
                          NULL, &toconv);
2694
0
        out->use += written;
2695
0
        out->content[out->use] = 0;
2696
#ifdef DEBUG_ENCODING
2697
  xmlGenericError(xmlGenericErrorContext,
2698
    "initialized encoder\n");
2699
#endif
2700
0
        return(0);
2701
0
    }
2702
2703
    /*
2704
     * Conversion itself.
2705
     */
2706
0
    toconv = in->use;
2707
0
    if (toconv == 0)
2708
0
  return(0);
2709
0
    if (toconv * 4 >= written) {
2710
0
        xmlBufferGrow(out, toconv * 4);
2711
0
  written = out->size - out->use - 1;
2712
0
    }
2713
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714
0
                            in->content, &toconv);
2715
0
    xmlBufferShrink(in, toconv);
2716
0
    out->use += written;
2717
0
    writtentot += written;
2718
0
    out->content[out->use] = 0;
2719
0
    if (ret == -1) {
2720
0
        if (written > 0) {
2721
            /* Can be a limitation of iconv or uconv */
2722
0
            goto retry;
2723
0
        }
2724
0
        ret = -3;
2725
0
    }
2726
2727
    /*
2728
     * Attempt to handle error cases
2729
     */
2730
0
    switch (ret) {
2731
0
        case 0:
2732
#ifdef DEBUG_ENCODING
2733
      xmlGenericError(xmlGenericErrorContext,
2734
        "converted %d bytes to %d bytes of output\n",
2735
              toconv, written);
2736
#endif
2737
0
      break;
2738
0
        case -1:
2739
#ifdef DEBUG_ENCODING
2740
      xmlGenericError(xmlGenericErrorContext,
2741
        "output conversion failed by lack of space\n");
2742
#endif
2743
0
      break;
2744
0
        case -3:
2745
#ifdef DEBUG_ENCODING
2746
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2747
              toconv, written, in->use);
2748
#endif
2749
0
      break;
2750
0
        case -4:
2751
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2752
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2753
0
      ret = -1;
2754
0
            break;
2755
0
        case -2: {
2756
0
      xmlChar charref[20];
2757
0
      int len = in->use;
2758
0
      const xmlChar *utf = (const xmlChar *) in->content;
2759
0
      int cur, charrefLen;
2760
2761
0
      cur = xmlGetUTF8Char(utf, &len);
2762
0
      if (cur <= 0)
2763
0
                break;
2764
2765
#ifdef DEBUG_ENCODING
2766
            xmlGenericError(xmlGenericErrorContext,
2767
                    "handling output conversion error\n");
2768
            xmlGenericError(xmlGenericErrorContext,
2769
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2770
                    in->content[0], in->content[1],
2771
                    in->content[2], in->content[3]);
2772
#endif
2773
            /*
2774
             * Removes the UTF8 sequence, and replace it by a charref
2775
             * and continue the transcoding phase, hoping the error
2776
             * did not mangle the encoder state.
2777
             */
2778
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2779
0
                             "&#%d;", cur);
2780
0
            xmlBufferShrink(in, len);
2781
0
            xmlBufferGrow(out, charrefLen * 4);
2782
0
      written = out->size - out->use - 1;
2783
0
            toconv = charrefLen;
2784
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2785
0
                                    charref, &toconv);
2786
2787
0
      if ((ret < 0) || (toconv != charrefLen)) {
2788
0
    char buf[50];
2789
2790
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2791
0
       in->content[0], in->content[1],
2792
0
       in->content[2], in->content[3]);
2793
0
    buf[49] = 0;
2794
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2795
0
        "output conversion failed due to conv error, bytes %s\n",
2796
0
             buf);
2797
0
    in->content[0] = ' ';
2798
0
          break;
2799
0
      }
2800
2801
0
            out->use += written;
2802
0
            writtentot += written;
2803
0
            out->content[out->use] = 0;
2804
0
            goto retry;
2805
0
  }
2806
0
    }
2807
0
    return(writtentot ? writtentot : ret);
2808
0
}
2809
2810
/**
2811
 * xmlCharEncCloseFunc:
2812
 * @handler:  char encoding transformation data structure
2813
 *
2814
 * Generic front-end for encoding handler close function
2815
 *
2816
 * Returns 0 if success, or -1 in case of error
2817
 */
2818
int
2819
4.42k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2820
4.42k
    int ret = 0;
2821
4.42k
    int tofree = 0;
2822
4.42k
    int i = 0;
2823
2824
4.42k
    if (handler == NULL) return(-1);
2825
4.42k
    if (handler->name == NULL) return(-1);
2826
2827
20.7k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2828
19.5k
        if (handler == &defaultHandlers[i])
2829
3.20k
            return(0);
2830
19.5k
    }
2831
2832
1.22k
    if (handlers != NULL) {
2833
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2834
0
            if (handler == handlers[i])
2835
0
                return(0);
2836
0
  }
2837
0
    }
2838
1.22k
#ifdef LIBXML_ICONV_ENABLED
2839
    /*
2840
     * Iconv handlers can be used only once, free the whole block.
2841
     * and the associated icon resources.
2842
     */
2843
1.22k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2844
1.22k
        tofree = 1;
2845
1.22k
  if (handler->iconv_out != NULL) {
2846
1.22k
      if (iconv_close(handler->iconv_out))
2847
0
    ret = -1;
2848
1.22k
      handler->iconv_out = NULL;
2849
1.22k
  }
2850
1.22k
  if (handler->iconv_in != NULL) {
2851
1.22k
      if (iconv_close(handler->iconv_in))
2852
0
    ret = -1;
2853
1.22k
      handler->iconv_in = NULL;
2854
1.22k
  }
2855
1.22k
    }
2856
1.22k
#endif /* LIBXML_ICONV_ENABLED */
2857
#ifdef LIBXML_ICU_ENABLED
2858
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2859
        tofree = 1;
2860
  if (handler->uconv_out != NULL) {
2861
      closeIcuConverter(handler->uconv_out);
2862
      handler->uconv_out = NULL;
2863
  }
2864
  if (handler->uconv_in != NULL) {
2865
      closeIcuConverter(handler->uconv_in);
2866
      handler->uconv_in = NULL;
2867
  }
2868
    }
2869
#endif
2870
1.22k
    if (tofree) {
2871
        /* free up only dynamic handlers iconv/uconv */
2872
1.22k
        if (handler->name != NULL)
2873
1.22k
            xmlFree(handler->name);
2874
1.22k
        handler->name = NULL;
2875
1.22k
        xmlFree(handler);
2876
1.22k
    }
2877
#ifdef DEBUG_ENCODING
2878
    if (ret)
2879
        xmlGenericError(xmlGenericErrorContext,
2880
    "failed to close the encoding handler\n");
2881
    else
2882
        xmlGenericError(xmlGenericErrorContext,
2883
    "closed the encoding handler\n");
2884
#endif
2885
2886
1.22k
    return(ret);
2887
1.22k
}
2888
2889
/**
2890
 * xmlByteConsumed:
2891
 * @ctxt: an XML parser context
2892
 *
2893
 * This function provides the current index of the parser relative
2894
 * to the start of the current entity. This function is computed in
2895
 * bytes from the beginning starting at zero and finishing at the
2896
 * size in byte of the file if parsing a file. The function is
2897
 * of constant cost if the input is UTF-8 but can be costly if run
2898
 * on non-UTF-8 input.
2899
 *
2900
 * Returns the index in bytes from the beginning of the entity or -1
2901
 *         in case the index could not be computed.
2902
 */
2903
long
2904
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2905
0
    xmlParserInputPtr in;
2906
2907
0
    if (ctxt == NULL) return(-1);
2908
0
    in = ctxt->input;
2909
0
    if (in == NULL)  return(-1);
2910
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2911
0
        unsigned int unused = 0;
2912
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2913
        /*
2914
   * Encoding conversion, compute the number of unused original
2915
   * bytes from the input not consumed and subtract that from
2916
   * the raw consumed value, this is not a cheap operation
2917
   */
2918
0
        if (in->end - in->cur > 0) {
2919
0
      unsigned char convbuf[32000];
2920
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2921
0
      int toconv = in->end - in->cur, written = 32000;
2922
2923
0
      int ret;
2924
2925
0
            do {
2926
0
                toconv = in->end - cur;
2927
0
                written = 32000;
2928
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2929
0
                                        cur, &toconv);
2930
0
                if (ret < 0) {
2931
0
                    if (written > 0)
2932
0
                        ret = -2;
2933
0
                    else
2934
0
                        return(-1);
2935
0
                }
2936
0
                unused += written;
2937
0
                cur += toconv;
2938
0
            } while (ret == -2);
2939
0
  }
2940
0
  if (in->buf->rawconsumed < unused)
2941
0
      return(-1);
2942
0
  return(in->buf->rawconsumed - unused);
2943
0
    }
2944
0
    return(in->consumed + (in->cur - in->base));
2945
0
}
2946
2947
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2948
#ifdef LIBXML_ISO8859X_ENABLED
2949
2950
/**
2951
 * UTF8ToISO8859x:
2952
 * @out:  a pointer to an array of bytes to store the result
2953
 * @outlen:  the length of @out
2954
 * @in:  a pointer to an array of UTF-8 chars
2955
 * @inlen:  the length of @in
2956
 * @xlattable: the 2-level transcoding table
2957
 *
2958
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2959
 * block of chars out.
2960
 *
2961
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2962
 * The value of @inlen after return is the number of octets consumed
2963
 *     as the return value is positive, else unpredictable.
2964
 * The value of @outlen after return is the number of octets consumed.
2965
 */
2966
static int
2967
UTF8ToISO8859x(unsigned char* out, int *outlen,
2968
              const unsigned char* in, int *inlen,
2969
              const unsigned char* const xlattable) {
2970
    const unsigned char* outstart = out;
2971
    const unsigned char* inend;
2972
    const unsigned char* instart = in;
2973
    const unsigned char* processed = in;
2974
2975
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2976
        (xlattable == NULL))
2977
  return(-1);
2978
    if (in == NULL) {
2979
        /*
2980
        * initialization nothing to do
2981
        */
2982
        *outlen = 0;
2983
        *inlen = 0;
2984
        return(0);
2985
    }
2986
    inend = in + (*inlen);
2987
    while (in < inend) {
2988
        unsigned char d = *in++;
2989
        if  (d < 0x80)  {
2990
            *out++ = d;
2991
        } else if (d < 0xC0) {
2992
            /* trailing byte in leading position */
2993
            *outlen = out - outstart;
2994
            *inlen = processed - instart;
2995
            return(-2);
2996
        } else if (d < 0xE0) {
2997
            unsigned char c;
2998
            if (!(in < inend)) {
2999
                /* trailing byte not in input buffer */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-3);
3003
            }
3004
            c = *in++;
3005
            if ((c & 0xC0) != 0x80) {
3006
                /* not a trailing byte */
3007
                *outlen = out - outstart;
3008
                *inlen = processed - instart;
3009
                return(-2);
3010
            }
3011
            c = c & 0x3F;
3012
            d = d & 0x1F;
3013
            d = xlattable [48 + c + xlattable [d] * 64];
3014
            if (d == 0) {
3015
                /* not in character set */
3016
                *outlen = out - outstart;
3017
                *inlen = processed - instart;
3018
                return(-2);
3019
            }
3020
            *out++ = d;
3021
        } else if (d < 0xF0) {
3022
            unsigned char c1;
3023
            unsigned char c2;
3024
            if (!(in < inend - 1)) {
3025
                /* trailing bytes not in input buffer */
3026
                *outlen = out - outstart;
3027
                *inlen = processed - instart;
3028
                return(-3);
3029
            }
3030
            c1 = *in++;
3031
            if ((c1 & 0xC0) != 0x80) {
3032
                /* not a trailing byte (c1) */
3033
                *outlen = out - outstart;
3034
                *inlen = processed - instart;
3035
                return(-2);
3036
            }
3037
            c2 = *in++;
3038
            if ((c2 & 0xC0) != 0x80) {
3039
                /* not a trailing byte (c2) */
3040
                *outlen = out - outstart;
3041
                *inlen = processed - instart;
3042
                return(-2);
3043
            }
3044
            c1 = c1 & 0x3F;
3045
            c2 = c2 & 0x3F;
3046
      d = d & 0x0F;
3047
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3048
      xlattable [32 + d] * 64] * 64];
3049
            if (d == 0) {
3050
                /* not in character set */
3051
                *outlen = out - outstart;
3052
                *inlen = processed - instart;
3053
                return(-2);
3054
            }
3055
            *out++ = d;
3056
        } else {
3057
            /* cannot transcode >= U+010000 */
3058
            *outlen = out - outstart;
3059
            *inlen = processed - instart;
3060
            return(-2);
3061
        }
3062
        processed = in;
3063
    }
3064
    *outlen = out - outstart;
3065
    *inlen = processed - instart;
3066
    return(*outlen);
3067
}
3068
3069
/**
3070
 * ISO8859xToUTF8
3071
 * @out:  a pointer to an array of bytes to store the result
3072
 * @outlen:  the length of @out
3073
 * @in:  a pointer to an array of ISO Latin 1 chars
3074
 * @inlen:  the length of @in
3075
 *
3076
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3077
 * block of chars out.
3078
 * Returns 0 if success, or -1 otherwise
3079
 * The value of @inlen after return is the number of octets consumed
3080
 * The value of @outlen after return is the number of octets produced.
3081
 */
3082
static int
3083
ISO8859xToUTF8(unsigned char* out, int *outlen,
3084
              const unsigned char* in, int *inlen,
3085
              unsigned short const *unicodetable) {
3086
    unsigned char* outstart = out;
3087
    unsigned char* outend;
3088
    const unsigned char* instart = in;
3089
    const unsigned char* inend;
3090
    const unsigned char* instop;
3091
    unsigned int c;
3092
3093
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3094
        (in == NULL) || (unicodetable == NULL))
3095
  return(-1);
3096
    outend = out + *outlen;
3097
    inend = in + *inlen;
3098
    instop = inend;
3099
3100
    while ((in < inend) && (out < outend - 2)) {
3101
        if (*in >= 0x80) {
3102
            c = unicodetable [*in - 0x80];
3103
            if (c == 0) {
3104
                /* undefined code point */
3105
                *outlen = out - outstart;
3106
                *inlen = in - instart;
3107
                return (-1);
3108
            }
3109
            if (c < 0x800) {
3110
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3111
                *out++ = (c & 0x3F) | 0x80;
3112
            } else {
3113
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3114
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3115
                *out++ = (c & 0x3F) | 0x80;
3116
            }
3117
            ++in;
3118
        }
3119
        if (instop - in > outend - out) instop = in + (outend - out);
3120
        while ((*in < 0x80) && (in < instop)) {
3121
            *out++ = *in++;
3122
        }
3123
    }
3124
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3125
        *out++ =  *in++;
3126
    }
3127
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3128
        *out++ =  *in++;
3129
    }
3130
    *outlen = out - outstart;
3131
    *inlen = in - instart;
3132
    return (*outlen);
3133
}
3134
3135
3136
/************************************************************************
3137
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3138
 ************************************************************************/
3139
3140
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3146
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3147
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3148
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3149
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3150
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3151
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3152
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3153
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3154
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3155
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3156
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3160
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3170
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3172
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3180
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3181
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3182
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3183
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3184
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3185
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3186
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3187
};
3188
3189
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3190
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3195
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3196
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3197
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3198
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3199
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3201
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3202
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3203
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3205
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3206
};
3207
3208
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3209
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3222
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3234
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3236
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3237
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3238
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3239
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3240
};
3241
3242
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3243
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3244
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3245
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3246
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3247
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3248
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3249
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3250
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3251
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3252
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3253
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3254
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3255
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3256
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3257
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3258
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3259
};
3260
3261
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3262
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3270
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3271
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3272
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3273
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3274
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3276
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3280
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3286
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3287
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3288
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3289
};
3290
3291
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3292
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3297
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3298
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3299
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3300
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3301
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3302
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3303
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3304
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3305
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3306
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3307
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3308
};
3309
3310
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3311
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3323
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3324
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3326
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3347
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3349
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3350
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3351
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3352
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3353
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3354
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3355
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3356
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3360
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3377
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3378
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3379
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3380
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
};
3384
3385
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3386
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3387
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3388
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3389
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3390
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3391
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3392
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3393
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3394
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3395
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3396
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3397
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3398
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3399
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3400
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3401
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3402
};
3403
3404
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3405
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3413
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3414
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3415
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3416
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3431
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
};
3437
3438
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3439
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3440
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3441
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3442
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3443
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3444
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3445
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3446
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3447
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3448
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3449
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3450
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3451
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3452
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3453
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3454
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3455
};
3456
3457
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3458
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3466
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3467
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3468
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3469
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3482
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3487
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
};
3490
3491
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3492
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3493
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3494
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3495
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3496
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3497
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3498
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3499
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3500
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3501
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3502
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3503
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3504
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3505
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3506
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3507
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3508
};
3509
3510
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3511
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3519
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3520
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3524
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3525
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
};
3535
3536
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3537
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3538
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3539
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3540
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3541
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3542
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3543
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3544
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3545
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3546
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3547
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3548
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3549
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3550
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3551
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3552
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3553
};
3554
3555
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3556
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3564
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3565
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3566
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3568
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3570
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3574
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3575
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3584
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3585
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3586
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3587
};
3588
3589
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3590
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3591
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3592
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3593
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3594
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3595
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3596
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3597
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3598
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3599
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3600
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3601
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3602
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3603
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3604
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3605
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3606
};
3607
3608
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3609
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3617
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3618
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3624
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3625
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3626
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3627
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3628
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3633
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
};
3637
3638
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3639
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3644
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3645
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3646
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3647
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3648
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3649
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3650
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3651
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3652
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3653
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3654
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3655
};
3656
3657
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3658
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3668
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3669
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3679
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3681
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3683
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3684
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3685
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3686
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3688
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3689
};
3690
3691
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3692
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3693
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3694
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3695
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3696
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3697
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3698
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3699
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3700
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3701
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3702
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3703
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3704
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3705
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3706
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3707
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3708
};
3709
3710
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3711
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3719
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3720
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3721
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3726
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3746
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3751
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3752
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3753
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3754
};
3755
3756
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3757
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3758
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3759
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3760
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3761
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3762
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3763
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3764
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3765
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3766
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3767
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3768
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3769
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3770
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3771
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3772
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3773
};
3774
3775
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3776
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3784
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3785
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3786
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3787
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3794
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3799
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3801
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3803
};
3804
3805
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3806
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3811
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3812
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3813
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3814
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3815
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3817
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3818
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3819
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3821
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3822
};
3823
3824
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3825
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3835
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3836
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3837
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3842
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3844
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3860
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3861
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3862
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3863
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3864
};
3865
3866
3867
/*
3868
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3869
 */
3870
3871
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3874
}
3875
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3878
}
3879
3880
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3883
}
3884
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3887
}
3888
3889
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3892
}
3893
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3894
    const unsigned char* in, int *inlen) {
3895
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3896
}
3897
3898
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3901
}
3902
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3903
    const unsigned char* in, int *inlen) {
3904
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3905
}
3906
3907
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3910
}
3911
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3912
    const unsigned char* in, int *inlen) {
3913
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3914
}
3915
3916
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3919
}
3920
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3921
    const unsigned char* in, int *inlen) {
3922
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3923
}
3924
3925
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3928
}
3929
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3930
    const unsigned char* in, int *inlen) {
3931
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3932
}
3933
3934
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3937
}
3938
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3939
    const unsigned char* in, int *inlen) {
3940
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3941
}
3942
3943
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3946
}
3947
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3948
    const unsigned char* in, int *inlen) {
3949
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3950
}
3951
3952
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3955
}
3956
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3957
    const unsigned char* in, int *inlen) {
3958
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3959
}
3960
3961
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3964
}
3965
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3966
    const unsigned char* in, int *inlen) {
3967
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3968
}
3969
3970
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3973
}
3974
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3975
    const unsigned char* in, int *inlen) {
3976
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3977
}
3978
3979
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3980
    const unsigned char* in, int *inlen) {
3981
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3982
}
3983
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3984
    const unsigned char* in, int *inlen) {
3985
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3986
}
3987
3988
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3989
    const unsigned char* in, int *inlen) {
3990
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3991
}
3992
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3993
    const unsigned char* in, int *inlen) {
3994
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3995
}
3996
3997
#endif
3998
#endif
3999