Coverage Report

Created: 2025-03-12 04:16

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
1.00k
{
103
1.00k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
1.00k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
1.00k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
1.00k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
21.0k
              const unsigned char* in, int *inlen) {
182
21.0k
    unsigned char* outstart = out;
183
21.0k
    const unsigned char* base = in;
184
21.0k
    const unsigned char* processed = in;
185
21.0k
    unsigned char* outend = out + *outlen;
186
21.0k
    const unsigned char* inend;
187
21.0k
    unsigned int c;
188
189
21.0k
    inend = in + (*inlen);
190
837k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
834k
  c= *in++;
192
193
834k
        if (out >= outend)
194
0
      break;
195
834k
        if (c < 0x80) {
196
816k
      *out++ = c;
197
816k
  } else {
198
17.9k
      *outlen = out - outstart;
199
17.9k
      *inlen = processed - base;
200
17.9k
      return(-1);
201
17.9k
  }
202
203
816k
  processed = (const unsigned char*) in;
204
816k
    }
205
3.07k
    *outlen = out - outstart;
206
3.07k
    *inlen = processed - base;
207
3.07k
    return(*outlen);
208
21.0k
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
433
              const unsigned char* in, int *inlen) {
229
433
    const unsigned char* processed = in;
230
433
    const unsigned char* outend;
231
433
    const unsigned char* outstart = out;
232
433
    const unsigned char* instart = in;
233
433
    const unsigned char* inend;
234
433
    unsigned int c, d;
235
433
    int trailing;
236
237
433
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
433
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
107
  *outlen = 0;
243
107
  *inlen = 0;
244
107
  return(0);
245
107
    }
246
326
    inend = in + (*inlen);
247
326
    outend = out + (*outlen);
248
69.7k
    while (in < inend) {
249
69.4k
  d = *in++;
250
69.4k
  if      (d < 0x80)  { c= d; trailing= 0; }
251
104
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
104
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
10
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
1
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
69.4k
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
69.6k
  for ( ; trailing; trailing--) {
271
115
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
115
      c <<= 6;
274
115
      c |= d & 0x3F;
275
115
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
69.4k
  if (c < 0x80) {
279
69.3k
      if (out >= outend)
280
0
    break;
281
69.3k
      *out++ = c;
282
69.3k
  } else {
283
      /* no chance for this in Ascii */
284
104
      *outlen = out - outstart;
285
104
      *inlen = processed - instart;
286
104
      return(-2);
287
104
  }
288
69.3k
  processed = in;
289
69.3k
    }
290
222
    *outlen = out - outstart;
291
222
    *inlen = processed - instart;
292
222
    return(*outlen);
293
326
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
123k
              const unsigned char* in, int *inlen) {
313
123k
    unsigned char* outstart = out;
314
123k
    const unsigned char* base = in;
315
123k
    unsigned char* outend;
316
123k
    const unsigned char* inend;
317
123k
    const unsigned char* instop;
318
319
123k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
123k
    outend = out + *outlen;
323
123k
    inend = in + (*inlen);
324
123k
    instop = inend;
325
326
389k
    while ((in < inend) && (out < outend - 1)) {
327
265k
  if (*in >= 0x80) {
328
143k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
143k
            *out++ = ((*in) & 0x3F) | 0x80;
330
143k
      ++in;
331
143k
  }
332
265k
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
42.4M
  while ((in < instop) && (*in < 0x80)) {
334
42.2M
      *out++ = *in++;
335
42.2M
  }
336
265k
    }
337
123k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
123k
    *outlen = out - outstart;
341
123k
    *inlen = in - base;
342
123k
    return(*outlen);
343
123k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
2.71k
{
362
2.71k
    int len;
363
364
2.71k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
2.71k
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
619
        *outlen = 0;
369
619
        *inlenb = 0;
370
619
        return(0);
371
619
    }
372
2.09k
    if (*outlen > *inlenb) {
373
2.09k
  len = *inlenb;
374
2.09k
    } else {
375
0
  len = *outlen;
376
0
    }
377
2.09k
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
2.09k
    memcpy(out, inb, len);
386
387
2.09k
    *outlen = len;
388
2.09k
    *inlenb = len;
389
2.09k
    return(*outlen);
390
2.09k
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
2.78k
              const unsigned char* in, int *inlen) {
413
2.78k
    const unsigned char* processed = in;
414
2.78k
    const unsigned char* outend;
415
2.78k
    const unsigned char* outstart = out;
416
2.78k
    const unsigned char* instart = in;
417
2.78k
    const unsigned char* inend;
418
2.78k
    unsigned int c, d;
419
2.78k
    int trailing;
420
421
2.78k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
2.78k
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
445
  *outlen = 0;
427
445
  *inlen = 0;
428
445
  return(0);
429
445
    }
430
2.34k
    inend = in + (*inlen);
431
2.34k
    outend = out + (*outlen);
432
8.20M
    while (in < inend) {
433
8.20M
  d = *in++;
434
8.20M
  if      (d < 0x80)  { c= d; trailing= 0; }
435
16.9k
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
65
      *outlen = out - outstart;
438
65
      *inlen = processed - instart;
439
65
      return(-2);
440
16.8k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
76
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
45
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
18
  else {
444
      /* no chance for this in IsoLat1 */
445
18
      *outlen = out - outstart;
446
18
      *inlen = processed - instart;
447
18
      return(-2);
448
18
  }
449
450
8.20M
  if (inend - in < trailing) {
451
6
      break;
452
6
  }
453
454
8.22M
  for ( ; trailing; trailing--) {
455
16.8k
      if (in >= inend)
456
0
    break;
457
16.8k
      if (((d= *in++) & 0xC0) != 0x80) {
458
46
    *outlen = out - outstart;
459
46
    *inlen = processed - instart;
460
46
    return(-2);
461
46
      }
462
16.8k
      c <<= 6;
463
16.8k
      c |= d & 0x3F;
464
16.8k
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
8.20M
  if (c <= 0xFF) {
468
8.20M
      if (out >= outend)
469
0
    break;
470
8.20M
      *out++ = c;
471
8.20M
  } else {
472
      /* no chance for this in IsoLat1 */
473
37
      *outlen = out - outstart;
474
37
      *inlen = processed - instart;
475
37
      return(-2);
476
37
  }
477
8.20M
  processed = in;
478
8.20M
    }
479
2.17k
    *outlen = out - outstart;
480
2.17k
    *inlen = processed - instart;
481
2.17k
    return(*outlen);
482
2.34k
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
8.60k
{
506
8.60k
    unsigned char* outstart = out;
507
8.60k
    const unsigned char* processed = inb;
508
8.60k
    unsigned char* outend;
509
8.60k
    unsigned short* in = (unsigned short*) inb;
510
8.60k
    unsigned short* inend;
511
8.60k
    unsigned int c, d, inlen;
512
8.60k
    unsigned char *tmp;
513
8.60k
    int bits;
514
515
8.60k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
8.60k
    outend = out + *outlen;
520
8.60k
    if ((*inlenb % 2) == 1)
521
3.04k
        (*inlenb)--;
522
8.60k
    inlen = *inlenb / 2;
523
8.60k
    inend = in + inlen;
524
571k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
563k
        if (xmlLittleEndian) {
526
563k
      c= *in++;
527
563k
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
563k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
553
      if (in >= inend) {           /* handle split mutli-byte characters */
535
305
    break;
536
305
      }
537
248
      if (xmlLittleEndian) {
538
248
    d = *in++;
539
248
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
248
            if ((d & 0xFC00) == 0xDC00) {
546
88
                c &= 0x03FF;
547
88
                c <<= 10;
548
88
                c |= d & 0x03FF;
549
88
                c += 0x10000;
550
88
            }
551
160
            else {
552
160
    *outlen = out - outstart;
553
160
    *inlenb = processed - inb;
554
160
          return(-2);
555
160
      }
556
248
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
562k
        if (out >= outend)
560
0
      break;
561
562k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
560k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
559k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
88
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
1.68M
        for ( ; bits >= 0; bits-= 6) {
567
1.11M
            if (out >= outend)
568
0
          break;
569
1.11M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
1.11M
        }
571
562k
  processed = (const unsigned char*) in;
572
562k
    }
573
8.44k
    *outlen = out - outstart;
574
8.44k
    *inlenb = processed - inb;
575
8.44k
    return(*outlen);
576
8.60k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
587
{
596
587
    unsigned short* out = (unsigned short*) outb;
597
587
    const unsigned char* processed = in;
598
587
    const unsigned char *const instart = in;
599
587
    unsigned short* outstart= out;
600
587
    unsigned short* outend;
601
587
    const unsigned char* inend;
602
587
    unsigned int c, d;
603
587
    int trailing;
604
587
    unsigned char *tmp;
605
587
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
587
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
587
    if (in == NULL) {
610
11
  *outlen = 0;
611
11
  *inlen = 0;
612
11
  return(0);
613
11
    }
614
576
    inend= in + *inlen;
615
576
    outend = out + (*outlen / 2);
616
36.4k
    while (in < inend) {
617
36.1k
      d= *in++;
618
36.1k
      if      (d < 0x80)  { c= d; trailing= 0; }
619
1.64k
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
296
    *outlen = (out - outstart) * 2;
622
296
    *inlen = processed - instart;
623
296
    return(-2);
624
1.34k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
985
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
572
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
14
      else {
628
  /* no chance for this in UTF-16 */
629
14
  *outlen = (out - outstart) * 2;
630
14
  *inlen = processed - instart;
631
14
  return(-2);
632
14
      }
633
634
35.8k
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
37.7k
      for ( ; trailing; trailing--) {
639
2.39k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
511
        break;
641
1.88k
          c <<= 6;
642
1.88k
          c |= d & 0x3F;
643
1.88k
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
35.8k
        if (c < 0x10000) {
647
35.5k
            if (out >= outend)
648
0
          break;
649
35.5k
      if (xmlLittleEndian) {
650
35.5k
    *out++ = c;
651
35.5k
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
35.5k
        }
658
349
        else if (c < 0x110000) {
659
345
            if (out+1 >= outend)
660
0
          break;
661
345
            c -= 0x10000;
662
345
      if (xmlLittleEndian) {
663
345
    *out++ = 0xD800 | (c >> 10);
664
345
    *out++ = 0xDC00 | (c & 0x03FF);
665
345
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
345
        }
679
4
        else
680
4
      break;
681
35.8k
  processed = in;
682
35.8k
    }
683
266
    *outlen = (out - outstart) * 2;
684
266
    *inlen = processed - instart;
685
266
    return(*outlen);
686
576
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
660
{
705
660
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
95
        if (*outlen >= 2) {
710
95
      outb[0] = 0xFF;
711
95
      outb[1] = 0xFE;
712
95
      *outlen = 2;
713
95
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
95
      return(2);
719
95
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
95
    }
724
565
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
660
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
4.64k
{
749
4.64k
    unsigned char* outstart = out;
750
4.64k
    const unsigned char* processed = inb;
751
4.64k
    unsigned char* outend;
752
4.64k
    unsigned short* in = (unsigned short*) inb;
753
4.64k
    unsigned short* inend;
754
4.64k
    unsigned int c, d, inlen;
755
4.64k
    unsigned char *tmp;
756
4.64k
    int bits;
757
758
4.64k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
4.64k
    outend = out + *outlen;
763
4.64k
    if ((*inlenb % 2) == 1)
764
1.30k
        (*inlenb)--;
765
4.64k
    inlen = *inlenb / 2;
766
4.64k
    inend= in + inlen;
767
295k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
291k
  if (xmlLittleEndian) {
769
291k
      tmp = (unsigned char *) in;
770
291k
      c = *tmp++;
771
291k
      c = (c << 8) | *tmp;
772
291k
      in++;
773
291k
  } else {
774
0
      c= *in++;
775
0
  }
776
291k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
359
      if (in >= inend) {           /* handle split mutli-byte characters */
778
161
                break;
779
161
      }
780
198
      if (xmlLittleEndian) {
781
198
    tmp = (unsigned char *) in;
782
198
    d = *tmp++;
783
198
    d = (d << 8) | *tmp;
784
198
    in++;
785
198
      } else {
786
0
    d= *in++;
787
0
      }
788
198
            if ((d & 0xFC00) == 0xDC00) {
789
60
                c &= 0x03FF;
790
60
                c <<= 10;
791
60
                c |= d & 0x03FF;
792
60
                c += 0x10000;
793
60
            }
794
138
            else {
795
138
    *outlen = out - outstart;
796
138
    *inlenb = processed - inb;
797
138
          return(-2);
798
138
      }
799
198
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
291k
        if (out >= outend)
803
0
      break;
804
291k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
290k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
289k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
60
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
870k
        for ( ; bits >= 0; bits-= 6) {
810
579k
            if (out >= outend)
811
0
          break;
812
579k
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
579k
        }
814
291k
  processed = (const unsigned char*) in;
815
291k
    }
816
4.50k
    *outlen = out - outstart;
817
4.50k
    *inlenb = processed - inb;
818
4.50k
    return(*outlen);
819
4.64k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
14
{
839
14
    unsigned short* out = (unsigned short*) outb;
840
14
    const unsigned char* processed = in;
841
14
    const unsigned char *const instart = in;
842
14
    unsigned short* outstart= out;
843
14
    unsigned short* outend;
844
14
    const unsigned char* inend;
845
14
    unsigned int c, d;
846
14
    int trailing;
847
14
    unsigned char *tmp;
848
14
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
14
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
14
    if (in == NULL) {
853
7
  *outlen = 0;
854
7
  *inlen = 0;
855
7
  return(0);
856
7
    }
857
7
    inend= in + *inlen;
858
7
    outend = out + (*outlen / 2);
859
301
    while (in < inend) {
860
294
      d= *in++;
861
294
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
294
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
294
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
294
        if (c < 0x10000) {
889
294
            if (out >= outend)  break;
890
294
      if (xmlLittleEndian) {
891
294
    tmp = (unsigned char *) out;
892
294
    *tmp = c >> 8;
893
294
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
294
    out++;
895
294
      } else {
896
0
    *out++ = c;
897
0
      }
898
294
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
294
  processed = in;
922
294
    }
923
7
    *outlen = (out - outstart) * 2;
924
7
    *inlen = processed - instart;
925
7
    return(*outlen);
926
7
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
123k
{
949
123k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
123k
    if (len >= 4) {
952
123k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
123k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
39
      return(XML_CHAR_ENCODING_UCS4BE);
955
123k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
123k
      (in[2] == 0x00) && (in[3] == 0x00))
957
54
      return(XML_CHAR_ENCODING_UCS4LE);
958
123k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
123k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
21
      return(XML_CHAR_ENCODING_UCS4_2143);
961
123k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
123k
      (in[2] == 0x00) && (in[3] == 0x00))
963
24
      return(XML_CHAR_ENCODING_UCS4_3412);
964
123k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
123k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
195
      return(XML_CHAR_ENCODING_EBCDIC);
967
123k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
123k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
48.7k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
74.3k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
74.3k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
117
      return(XML_CHAR_ENCODING_UTF16LE);
978
74.1k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
74.1k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
66
      return(XML_CHAR_ENCODING_UTF16BE);
981
74.1k
    }
982
74.1k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
74.1k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
74.1k
      (in[2] == 0xBF))
989
954
      return(XML_CHAR_ENCODING_UTF8);
990
74.1k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
73.1k
    if (len >= 2) {
993
73.1k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
399
      return(XML_CHAR_ENCODING_UTF16BE);
995
72.7k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
594
      return(XML_CHAR_ENCODING_UTF16LE);
997
72.7k
    }
998
72.1k
    return(XML_CHAR_ENCODING_NONE);
999
73.1k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
9.60k
xmlGetEncodingAlias(const char *alias) {
1035
9.60k
    int i;
1036
9.60k
    char upper[100];
1037
1038
9.60k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
9.60k
    if (xmlCharEncodingAliases == NULL)
1042
9.60k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = toupper(alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = toupper(alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
19.3k
{
1167
19.3k
    const char *alias;
1168
19.3k
    char upper[500];
1169
19.3k
    int i;
1170
1171
19.3k
    if (name == NULL)
1172
17.1k
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
2.22k
    alias = xmlGetEncodingAlias(name);
1178
2.22k
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
29.9k
    for (i = 0;i < 499;i++) {
1182
29.9k
        upper[i] = toupper(name[i]);
1183
29.9k
  if (upper[i] == 0) break;
1184
29.9k
    }
1185
2.22k
    upper[i] = 0;
1186
1187
2.22k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
2.22k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
1.61k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
1.60k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
1.50k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
1.48k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
1.48k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
1.48k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
1.48k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
1.36k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
1.36k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
1.36k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
916
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
916
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
916
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
910
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
910
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
910
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
905
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
901
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
860
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
854
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
847
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
844
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
839
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
839
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
839
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
802
    return(XML_CHAR_ENCODING_ERROR);
1235
839
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
169
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
169
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
124
        case XML_CHAR_ENCODING_UCS4LE:
1264
124
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
21
        case XML_CHAR_ENCODING_UCS4_2143:
1268
21
            return("ISO-10646-UCS-4");
1269
24
        case XML_CHAR_ENCODING_UCS4_3412:
1270
24
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS2:
1272
0
            return("ISO-10646-UCS-2");
1273
0
        case XML_CHAR_ENCODING_8859_1:
1274
0
      return("ISO-8859-1");
1275
0
        case XML_CHAR_ENCODING_8859_2:
1276
0
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
169
    }
1300
0
    return(NULL);
1301
169
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
85.0k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = toupper(name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
50
xmlInitEncodingInternal(void) {
1501
50
    unsigned short int tst = 0x1234;
1502
50
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
50
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
50
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
50
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
153k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
153k
    xmlCharEncodingHandlerPtr handler;
1592
1593
153k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
135k
        case XML_CHAR_ENCODING_NONE:
1597
135k
      return(NULL);
1598
16.1k
        case XML_CHAR_ENCODING_UTF8:
1599
16.1k
      return(NULL);
1600
937
        case XML_CHAR_ENCODING_UTF16LE:
1601
937
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
605
        case XML_CHAR_ENCODING_UTF16BE:
1603
605
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
260
        case XML_CHAR_ENCODING_EBCDIC:
1605
260
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
260
            if (handler != NULL) return(handler);
1607
260
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
260
            if (handler != NULL) return(handler);
1609
260
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
260
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
52
        case XML_CHAR_ENCODING_UCS4BE:
1615
52
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
52
            if (handler != NULL) return(handler);
1617
52
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
52
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
72
        case XML_CHAR_ENCODING_UCS4LE:
1623
72
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
72
            if (handler != NULL) return(handler);
1625
72
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
72
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
28
        case XML_CHAR_ENCODING_UCS4_2143:
1631
28
      break;
1632
32
        case XML_CHAR_ENCODING_UCS4_3412:
1633
32
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
153k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
60
    return(NULL);
1712
153k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
7.37k
xmlFindCharEncodingHandler(const char *name) {
1725
7.37k
    const char *nalias;
1726
7.37k
    const char *norig;
1727
7.37k
    xmlCharEncoding alias;
1728
7.37k
#ifdef LIBXML_ICONV_ENABLED
1729
7.37k
    xmlCharEncodingHandlerPtr enc;
1730
7.37k
    iconv_t icv_in, icv_out;
1731
7.37k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
7.37k
    char upper[100];
1737
7.37k
    int i;
1738
1739
7.37k
    if (name == NULL) return(NULL);
1740
7.37k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
7.37k
    norig = name;
1746
7.37k
    nalias = xmlGetEncodingAlias(name);
1747
7.37k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
74.1k
    for (i = 0;i < 99;i++) {
1754
74.0k
        upper[i] = toupper(name[i]);
1755
74.0k
  if (upper[i] == 0) break;
1756
74.0k
    }
1757
7.37k
    upper[i] = 0;
1758
1759
44.6k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
42.2k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
5.03k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
42.2k
    }
1763
1764
2.33k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
2.33k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
2.33k
    icv_in = iconv_open("UTF-8", name);
1779
2.33k
    icv_out = iconv_open(name, "UTF-8");
1780
2.33k
    if (icv_in == (iconv_t) -1) {
1781
777
        icv_in = iconv_open("UTF-8", upper);
1782
777
    }
1783
2.33k
    if (icv_out == (iconv_t) -1) {
1784
777
  icv_out = iconv_open(upper, "UTF-8");
1785
777
    }
1786
2.33k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
1.56k
      enc = (xmlCharEncodingHandlerPtr)
1788
1.56k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
1.56k
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
1.56k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
1.56k
      enc->name = xmlMemStrdup(name);
1796
1.56k
      enc->input = NULL;
1797
1.56k
      enc->output = NULL;
1798
1.56k
      enc->iconv_in = icv_in;
1799
1.56k
      enc->iconv_out = icv_out;
1800
#ifdef DEBUG_ENCODING
1801
            xmlGenericError(xmlGenericErrorContext,
1802
        "Found iconv handler for encoding %s\n", name);
1803
#endif
1804
1.56k
      return enc;
1805
1.56k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1806
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1807
0
        "iconv : problems with filters for '%s'\n", name);
1808
0
      if (icv_in != (iconv_t) -1)
1809
0
    iconv_close(icv_in);
1810
0
      else
1811
0
    iconv_close(icv_out);
1812
0
    }
1813
777
#endif /* LIBXML_ICONV_ENABLED */
1814
#ifdef LIBXML_ICU_ENABLED
1815
    /* check whether icu can handle this */
1816
    ucv_in = openIcuConverter(name, 1);
1817
    ucv_out = openIcuConverter(name, 0);
1818
    if (ucv_in != NULL && ucv_out != NULL) {
1819
      encu = (xmlCharEncodingHandlerPtr)
1820
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1821
      if (encu == NULL) {
1822
                closeIcuConverter(ucv_in);
1823
                closeIcuConverter(ucv_out);
1824
    return(NULL);
1825
      }
1826
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1827
      encu->name = xmlMemStrdup(name);
1828
      encu->input = NULL;
1829
      encu->output = NULL;
1830
      encu->uconv_in = ucv_in;
1831
      encu->uconv_out = ucv_out;
1832
#ifdef DEBUG_ENCODING
1833
            xmlGenericError(xmlGenericErrorContext,
1834
        "Found ICU converter handler for encoding %s\n", name);
1835
#endif
1836
      return encu;
1837
    } else if (ucv_in != NULL || ucv_out != NULL) {
1838
            closeIcuConverter(ucv_in);
1839
            closeIcuConverter(ucv_out);
1840
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1841
        "ICU converter : problems with filters for '%s'\n", name);
1842
    }
1843
#endif /* LIBXML_ICU_ENABLED */
1844
1845
#ifdef DEBUG_ENCODING
1846
    xmlGenericError(xmlGenericErrorContext,
1847
      "No handler found for encoding %s\n", name);
1848
#endif
1849
1850
    /*
1851
     * Fallback using the canonical names
1852
     */
1853
777
    alias = xmlParseCharEncoding(norig);
1854
777
    if (alias != XML_CHAR_ENCODING_ERROR) {
1855
124
        const char* canon;
1856
124
        canon = xmlGetCharEncodingName(alias);
1857
124
        if ((canon != NULL) && (strcmp(name, canon))) {
1858
0
      return(xmlFindCharEncodingHandler(canon));
1859
0
        }
1860
124
    }
1861
1862
    /* If "none of the above", give up */
1863
777
    return(NULL);
1864
777
}
1865
1866
/************************************************************************
1867
 *                  *
1868
 *    ICONV based generic conversion functions    *
1869
 *                  *
1870
 ************************************************************************/
1871
1872
#ifdef LIBXML_ICONV_ENABLED
1873
/**
1874
 * xmlIconvWrapper:
1875
 * @cd:   iconv converter data structure
1876
 * @out:  a pointer to an array of bytes to store the result
1877
 * @outlen:  the length of @out
1878
 * @in:  a pointer to an array of input bytes
1879
 * @inlen:  the length of @in
1880
 *
1881
 * Returns 0 if success, or
1882
 *     -1 by lack of space, or
1883
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1884
 *        the result of transformation can't fit into the encoding we want), or
1885
 *     -3 if there the last byte can't form a single output char.
1886
 *
1887
 * The value of @inlen after return is the number of octets consumed
1888
 *     as the return value is positive, else unpredictable.
1889
 * The value of @outlen after return is the number of octets produced.
1890
 */
1891
static int
1892
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1893
12.1k
                const unsigned char *in, int *inlen) {
1894
12.1k
    size_t icv_inlen, icv_outlen;
1895
12.1k
    const char *icv_in = (const char *) in;
1896
12.1k
    char *icv_out = (char *) out;
1897
12.1k
    size_t ret;
1898
1899
12.1k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900
168
        if (outlen != NULL) *outlen = 0;
1901
168
        return(-1);
1902
168
    }
1903
12.0k
    icv_inlen = *inlen;
1904
12.0k
    icv_outlen = *outlen;
1905
    /*
1906
     * Some versions take const, other versions take non-const input.
1907
     */
1908
12.0k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1909
12.0k
    *inlen -= icv_inlen;
1910
12.0k
    *outlen -= icv_outlen;
1911
12.0k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1912
2.15k
#ifdef EILSEQ
1913
2.15k
        if (errno == EILSEQ) {
1914
896
            return -2;
1915
896
        } else
1916
1.25k
#endif
1917
1.25k
#ifdef E2BIG
1918
1.25k
        if (errno == E2BIG) {
1919
28
            return -1;
1920
28
        } else
1921
1.22k
#endif
1922
1.22k
#ifdef EINVAL
1923
1.22k
        if (errno == EINVAL) {
1924
1.22k
            return -3;
1925
1.22k
        } else
1926
0
#endif
1927
0
        {
1928
0
            return -3;
1929
0
        }
1930
2.15k
    }
1931
9.86k
    return 0;
1932
12.0k
}
1933
#endif /* LIBXML_ICONV_ENABLED */
1934
1935
/************************************************************************
1936
 *                  *
1937
 *    ICU based generic conversion functions    *
1938
 *                  *
1939
 ************************************************************************/
1940
1941
#ifdef LIBXML_ICU_ENABLED
1942
/**
1943
 * xmlUconvWrapper:
1944
 * @cd: ICU uconverter data structure
1945
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1946
 * @out:  a pointer to an array of bytes to store the result
1947
 * @outlen:  the length of @out
1948
 * @in:  a pointer to an array of input bytes
1949
 * @inlen:  the length of @in
1950
 * @flush: if true, indicates end of input
1951
 *
1952
 * Returns 0 if success, or
1953
 *     -1 by lack of space, or
1954
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1955
 *        the result of transformation can't fit into the encoding we want), or
1956
 *     -3 if there the last byte can't form a single output char.
1957
 *
1958
 * The value of @inlen after return is the number of octets consumed
1959
 *     as the return value is positive, else unpredictable.
1960
 * The value of @outlen after return is the number of octets produced.
1961
 */
1962
static int
1963
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1964
                const unsigned char *in, int *inlen, int flush) {
1965
    const char *ucv_in = (const char *) in;
1966
    char *ucv_out = (char *) out;
1967
    UErrorCode err = U_ZERO_ERROR;
1968
1969
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1970
        if (outlen != NULL) *outlen = 0;
1971
        return(-1);
1972
    }
1973
1974
    if (toUnicode) {
1975
        /* encoding => UTF-16 => UTF-8 */
1976
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1977
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1978
                       &cd->pivot_source, &cd->pivot_target,
1979
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1980
    } else {
1981
        /* UTF-8 => UTF-16 => encoding */
1982
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1983
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1984
                       &cd->pivot_source, &cd->pivot_target,
1985
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1986
    }
1987
    *inlen = ucv_in - (const char*) in;
1988
    *outlen = ucv_out - (char *) out;
1989
    if (U_SUCCESS(err)) {
1990
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1991
        if (flush)
1992
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1993
        return 0;
1994
    }
1995
    if (err == U_BUFFER_OVERFLOW_ERROR)
1996
        return -1;
1997
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1998
        return -2;
1999
    return -3;
2000
}
2001
#endif /* LIBXML_ICU_ENABLED */
2002
2003
/************************************************************************
2004
 *                  *
2005
 *    The real API used by libxml for on-the-fly conversion *
2006
 *                  *
2007
 ************************************************************************/
2008
2009
/**
2010
 * xmlEncInputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 * @flush:  flush (ICU-related)
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
168k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2031
168k
    int ret;
2032
168k
    (void)flush;
2033
2034
168k
    if (handler->input != NULL) {
2035
157k
        ret = handler->input(out, outlen, in, inlen);
2036
157k
        if (ret > 0)
2037
135k
           ret = 0;
2038
157k
    }
2039
11.4k
#ifdef LIBXML_ICONV_ENABLED
2040
11.4k
    else if (handler->iconv_in != NULL) {
2041
11.4k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2042
11.4k
    }
2043
3
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_in != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2047
                              flush);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
3
    else {
2051
3
        *outlen = 0;
2052
3
        *inlen = 0;
2053
3
        ret = -2;
2054
3
    }
2055
2056
168k
    return(ret);
2057
168k
}
2058
2059
/**
2060
 * xmlEncOutputChunk:
2061
 * @handler:  encoding handler
2062
 * @out:  a pointer to an array of bytes to store the result
2063
 * @outlen:  the length of @out
2064
 * @in:  a pointer to an array of input bytes
2065
 * @inlen:  the length of @in
2066
 *
2067
 * Returns 0 if success, or
2068
 *     -1 by lack of space, or
2069
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2070
 *        the result of transformation can't fit into the encoding we want), or
2071
 *     -3 if there the last byte can't form a single output char.
2072
 *     -4 if no output function was found.
2073
 *
2074
 * The value of @inlen after return is the number of octets consumed
2075
 *     as the return value is 0, else unpredictable.
2076
 * The value of @outlen after return is the number of octets produced.
2077
 */
2078
static int
2079
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2080
7.34k
                  int *outlen, const unsigned char *in, int *inlen) {
2081
7.34k
    int ret;
2082
2083
7.34k
    if (handler->output != NULL) {
2084
6.63k
        ret = handler->output(out, outlen, in, inlen);
2085
6.63k
        if (ret > 0)
2086
4.85k
           ret = 0;
2087
6.63k
    }
2088
714
#ifdef LIBXML_ICONV_ENABLED
2089
714
    else if (handler->iconv_out != NULL) {
2090
714
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2091
714
    }
2092
0
#endif /* LIBXML_ICONV_ENABLED */
2093
#ifdef LIBXML_ICU_ENABLED
2094
    else if (handler->uconv_out != NULL) {
2095
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2096
                              1);
2097
    }
2098
#endif /* LIBXML_ICU_ENABLED */
2099
0
    else {
2100
0
        *outlen = 0;
2101
0
        *inlen = 0;
2102
0
        ret = -4;
2103
0
    }
2104
2105
7.34k
    return(ret);
2106
7.34k
}
2107
2108
/**
2109
 * xmlCharEncFirstLine:
2110
 * @handler:  char encoding transformation data structure
2111
 * @out:  an xmlBuffer for the output.
2112
 * @in:  an xmlBuffer for the input
2113
 *
2114
 * Front-end for the encoding handler input function, but handle only
2115
 * the very first line, i.e. limit itself to 45 chars.
2116
 *
2117
 * Returns the number of byte written if success, or
2118
 *     -1 general error
2119
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2120
 *        the result of transformation can't fit into the encoding we want), or
2121
 */
2122
int
2123
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2124
0
                    xmlBufferPtr in) {
2125
0
    int ret;
2126
0
    int written;
2127
0
    int toconv;
2128
2129
0
    if (handler == NULL) return(-1);
2130
0
    if (out == NULL) return(-1);
2131
0
    if (in == NULL) return(-1);
2132
2133
    /* calculate space available */
2134
0
    written = out->size - out->use - 1; /* count '\0' */
2135
0
    toconv = in->use;
2136
    /*
2137
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2138
     * 45 chars should be sufficient to reach the end of the encoding
2139
     * declaration without going too far inside the document content.
2140
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2141
     * The actual value depending on guessed encoding is passed as @len
2142
     * if provided
2143
     */
2144
0
    if (toconv > 180)
2145
0
        toconv = 180;
2146
0
    if (toconv * 2 >= written) {
2147
0
        xmlBufferGrow(out, toconv * 2);
2148
0
  written = out->size - out->use - 1;
2149
0
    }
2150
2151
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2152
0
                           in->content, &toconv, 0);
2153
0
    xmlBufferShrink(in, toconv);
2154
0
    out->use += written;
2155
0
    out->content[out->use] = 0;
2156
0
    if (ret == -1) ret = -3;
2157
2158
#ifdef DEBUG_ENCODING
2159
    switch (ret) {
2160
        case 0:
2161
      xmlGenericError(xmlGenericErrorContext,
2162
        "converted %d bytes to %d bytes of input\n",
2163
              toconv, written);
2164
      break;
2165
        case -1:
2166
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2167
              toconv, written, in->use);
2168
      break;
2169
        case -2:
2170
      xmlGenericError(xmlGenericErrorContext,
2171
        "input conversion failed due to input error\n");
2172
      break;
2173
        case -3:
2174
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2175
              toconv, written, in->use);
2176
      break;
2177
  default:
2178
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2179
    }
2180
#endif /* DEBUG_ENCODING */
2181
    /*
2182
     * Ignore when input buffer is not on a boundary
2183
     */
2184
0
    if (ret == -3) ret = 0;
2185
0
    if (ret == -1) ret = 0;
2186
0
    return(written ? written : ret);
2187
0
}
2188
2189
/**
2190
 * xmlCharEncFirstLineInput:
2191
 * @input: a parser input buffer
2192
 * @len:  number of bytes to convert for the first line, or -1
2193
 *
2194
 * Front-end for the encoding handler input function, but handle only
2195
 * the very first line. Point is that this is based on autodetection
2196
 * of the encoding and once that first line is converted we may find
2197
 * out that a different decoder is needed to process the input.
2198
 *
2199
 * Returns the number of byte written if success, or
2200
 *     -1 general error
2201
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2202
 *        the result of transformation can't fit into the encoding we want), or
2203
 */
2204
int
2205
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2206
5.69k
{
2207
5.69k
    int ret;
2208
5.69k
    size_t written;
2209
5.69k
    size_t toconv;
2210
5.69k
    int c_in;
2211
5.69k
    int c_out;
2212
5.69k
    xmlBufPtr in;
2213
5.69k
    xmlBufPtr out;
2214
2215
5.69k
    if ((input == NULL) || (input->encoder == NULL) ||
2216
5.69k
        (input->buffer == NULL) || (input->raw == NULL))
2217
0
        return (-1);
2218
5.69k
    out = input->buffer;
2219
5.69k
    in = input->raw;
2220
2221
5.69k
    toconv = xmlBufUse(in);
2222
5.69k
    if (toconv == 0)
2223
15
        return (0);
2224
5.67k
    written = xmlBufAvail(out);
2225
    /*
2226
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2227
     * 45 chars should be sufficient to reach the end of the encoding
2228
     * declaration without going too far inside the document content.
2229
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2230
     * The actual value depending on guessed encoding is passed as @len
2231
     * if provided
2232
     */
2233
5.67k
    if (len >= 0) {
2234
1.00k
        if (toconv > (unsigned int) len)
2235
688
            toconv = len;
2236
4.67k
    } else {
2237
4.67k
        if (toconv > 180)
2238
2.86k
            toconv = 180;
2239
4.67k
    }
2240
5.67k
    if (toconv * 2 >= written) {
2241
0
        xmlBufGrow(out, toconv * 2);
2242
0
        written = xmlBufAvail(out);
2243
0
    }
2244
5.67k
    if (written > 360)
2245
5.67k
        written = 360;
2246
2247
5.67k
    c_in = toconv;
2248
5.67k
    c_out = written;
2249
5.67k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2250
5.67k
                           xmlBufContent(in), &c_in, 0);
2251
5.67k
    xmlBufShrink(in, c_in);
2252
5.67k
    xmlBufAddLen(out, c_out);
2253
5.67k
    if (ret == -1)
2254
253
        ret = -3;
2255
2256
5.67k
    switch (ret) {
2257
5.09k
        case 0:
2258
#ifdef DEBUG_ENCODING
2259
            xmlGenericError(xmlGenericErrorContext,
2260
                            "converted %d bytes to %d bytes of input\n",
2261
                            c_in, c_out);
2262
#endif
2263
5.09k
            break;
2264
0
        case -1:
2265
#ifdef DEBUG_ENCODING
2266
            xmlGenericError(xmlGenericErrorContext,
2267
                         "converted %d bytes to %d bytes of input, %d left\n",
2268
                            c_in, c_out, (int)xmlBufUse(in));
2269
#endif
2270
0
            break;
2271
333
        case -3:
2272
#ifdef DEBUG_ENCODING
2273
            xmlGenericError(xmlGenericErrorContext,
2274
                        "converted %d bytes to %d bytes of input, %d left\n",
2275
                            c_in, c_out, (int)xmlBufUse(in));
2276
#endif
2277
333
            break;
2278
253
        case -2: {
2279
253
            char buf[50];
2280
253
            const xmlChar *content = xmlBufContent(in);
2281
2282
253
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2283
253
         content[0], content[1],
2284
253
         content[2], content[3]);
2285
253
      buf[49] = 0;
2286
253
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2287
253
        "input conversion failed due to input error, bytes %s\n",
2288
253
               buf);
2289
253
        }
2290
5.67k
    }
2291
    /*
2292
     * Ignore when input buffer is not on a boundary
2293
     */
2294
5.67k
    if (ret == -3) ret = 0;
2295
5.67k
    if (ret == -1) ret = 0;
2296
5.67k
    return(c_out ? c_out : ret);
2297
5.67k
}
2298
2299
/**
2300
 * xmlCharEncInput:
2301
 * @input: a parser input buffer
2302
 * @flush: try to flush all the raw buffer
2303
 *
2304
 * Generic front-end for the encoding handler on parser input
2305
 *
2306
 * Returns the number of byte written if success, or
2307
 *     -1 general error
2308
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2309
 *        the result of transformation can't fit into the encoding we want), or
2310
 */
2311
int
2312
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2313
215k
{
2314
215k
    int ret;
2315
215k
    size_t written;
2316
215k
    size_t toconv;
2317
215k
    int c_in;
2318
215k
    int c_out;
2319
215k
    xmlBufPtr in;
2320
215k
    xmlBufPtr out;
2321
2322
215k
    if ((input == NULL) || (input->encoder == NULL) ||
2323
215k
        (input->buffer == NULL) || (input->raw == NULL))
2324
0
        return (-1);
2325
215k
    out = input->buffer;
2326
215k
    in = input->raw;
2327
2328
215k
    toconv = xmlBufUse(in);
2329
215k
    if (toconv == 0)
2330
52.6k
        return (0);
2331
163k
    if ((toconv > 64 * 1024) && (flush == 0))
2332
0
        toconv = 64 * 1024;
2333
163k
    written = xmlBufAvail(out);
2334
163k
    if (toconv * 2 >= written) {
2335
4.50k
        if (xmlBufGrow(out, toconv * 2) < 0)
2336
0
            return (-1);
2337
4.50k
        written = xmlBufAvail(out);
2338
4.50k
    }
2339
163k
    if ((written > 128 * 1024) && (flush == 0))
2340
0
        written = 128 * 1024;
2341
2342
163k
    c_in = toconv;
2343
163k
    c_out = written;
2344
163k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2345
163k
                           xmlBufContent(in), &c_in, flush);
2346
163k
    xmlBufShrink(in, c_in);
2347
163k
    xmlBufAddLen(out, c_out);
2348
163k
    if (ret == -1)
2349
17.7k
        ret = -3;
2350
2351
163k
    switch (ret) {
2352
143k
        case 0:
2353
#ifdef DEBUG_ENCODING
2354
            xmlGenericError(xmlGenericErrorContext,
2355
                            "converted %d bytes to %d bytes of input\n",
2356
                            c_in, c_out);
2357
#endif
2358
143k
            break;
2359
0
        case -1:
2360
#ifdef DEBUG_ENCODING
2361
            xmlGenericError(xmlGenericErrorContext,
2362
                         "converted %d bytes to %d bytes of input, %d left\n",
2363
                            c_in, c_out, (int)xmlBufUse(in));
2364
#endif
2365
0
            break;
2366
18.8k
        case -3:
2367
#ifdef DEBUG_ENCODING
2368
            xmlGenericError(xmlGenericErrorContext,
2369
                        "converted %d bytes to %d bytes of input, %d left\n",
2370
                            c_in, c_out, (int)xmlBufUse(in));
2371
#endif
2372
18.8k
            break;
2373
754
        case -2: {
2374
754
            char buf[50];
2375
754
            const xmlChar *content = xmlBufContent(in);
2376
2377
754
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2378
754
         content[0], content[1],
2379
754
         content[2], content[3]);
2380
754
      buf[49] = 0;
2381
754
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2382
754
        "input conversion failed due to input error, bytes %s\n",
2383
754
               buf);
2384
754
        }
2385
163k
    }
2386
    /*
2387
     * Ignore when input buffer is not on a boundary
2388
     */
2389
163k
    if (ret == -3)
2390
18.8k
        ret = 0;
2391
163k
    return (c_out? c_out : ret);
2392
163k
}
2393
2394
/**
2395
 * xmlCharEncInFunc:
2396
 * @handler:  char encoding transformation data structure
2397
 * @out:  an xmlBuffer for the output.
2398
 * @in:  an xmlBuffer for the input
2399
 *
2400
 * Generic front-end for the encoding handler input function
2401
 *
2402
 * Returns the number of byte written if success, or
2403
 *     -1 general error
2404
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2405
 *        the result of transformation can't fit into the encoding we want), or
2406
 */
2407
int
2408
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2409
                 xmlBufferPtr in)
2410
0
{
2411
0
    int ret;
2412
0
    int written;
2413
0
    int toconv;
2414
2415
0
    if (handler == NULL)
2416
0
        return (-1);
2417
0
    if (out == NULL)
2418
0
        return (-1);
2419
0
    if (in == NULL)
2420
0
        return (-1);
2421
2422
0
    toconv = in->use;
2423
0
    if (toconv == 0)
2424
0
        return (0);
2425
0
    written = out->size - out->use -1; /* count '\0' */
2426
0
    if (toconv * 2 >= written) {
2427
0
        xmlBufferGrow(out, out->size + toconv * 2);
2428
0
        written = out->size - out->use - 1;
2429
0
    }
2430
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2431
0
                           in->content, &toconv, 1);
2432
0
    xmlBufferShrink(in, toconv);
2433
0
    out->use += written;
2434
0
    out->content[out->use] = 0;
2435
0
    if (ret == -1)
2436
0
        ret = -3;
2437
2438
0
    switch (ret) {
2439
0
        case 0:
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                            "converted %d bytes to %d bytes of input\n",
2443
                            toconv, written);
2444
#endif
2445
0
            break;
2446
0
        case -1:
2447
#ifdef DEBUG_ENCODING
2448
            xmlGenericError(xmlGenericErrorContext,
2449
                         "converted %d bytes to %d bytes of input, %d left\n",
2450
                            toconv, written, in->use);
2451
#endif
2452
0
            break;
2453
0
        case -3:
2454
#ifdef DEBUG_ENCODING
2455
            xmlGenericError(xmlGenericErrorContext,
2456
                        "converted %d bytes to %d bytes of input, %d left\n",
2457
                            toconv, written, in->use);
2458
#endif
2459
0
            break;
2460
0
        case -2: {
2461
0
            char buf[50];
2462
2463
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2464
0
         in->content[0], in->content[1],
2465
0
         in->content[2], in->content[3]);
2466
0
      buf[49] = 0;
2467
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2468
0
        "input conversion failed due to input error, bytes %s\n",
2469
0
               buf);
2470
0
        }
2471
0
    }
2472
    /*
2473
     * Ignore when input buffer is not on a boundary
2474
     */
2475
0
    if (ret == -3)
2476
0
        ret = 0;
2477
0
    return (written? written : ret);
2478
0
}
2479
2480
#ifdef LIBXML_OUTPUT_ENABLED
2481
/**
2482
 * xmlCharEncOutput:
2483
 * @output: a parser output buffer
2484
 * @init: is this an initialization call without data
2485
 *
2486
 * Generic front-end for the encoding handler on parser output
2487
 * a first call with @init == 1 has to be made first to initiate the
2488
 * output in case of non-stateless encoding needing to initiate their
2489
 * state or the output (like the BOM in UTF16).
2490
 * In case of UTF8 sequence conversion errors for the given encoder,
2491
 * the content will be automatically remapped to a CharRef sequence.
2492
 *
2493
 * Returns the number of byte written if success, or
2494
 *     -1 general error
2495
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2496
 *        the result of transformation can't fit into the encoding we want), or
2497
 */
2498
int
2499
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2500
7.60k
{
2501
7.60k
    int ret;
2502
7.60k
    size_t written;
2503
7.60k
    int writtentot = 0;
2504
7.60k
    size_t toconv;
2505
7.60k
    int c_in;
2506
7.60k
    int c_out;
2507
7.60k
    xmlBufPtr in;
2508
7.60k
    xmlBufPtr out;
2509
2510
7.60k
    if ((output == NULL) || (output->encoder == NULL) ||
2511
7.60k
        (output->buffer == NULL) || (output->conv == NULL))
2512
0
        return (-1);
2513
7.60k
    out = output->conv;
2514
7.60k
    in = output->buffer;
2515
2516
8.14k
retry:
2517
2518
8.14k
    written = xmlBufAvail(out);
2519
2520
    /*
2521
     * First specific handling of the initialization call
2522
     */
2523
8.14k
    if (init) {
2524
1.45k
        c_in = 0;
2525
1.45k
        c_out = written;
2526
        /* TODO: Check return value. */
2527
1.45k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2528
1.45k
                          NULL, &c_in);
2529
1.45k
        xmlBufAddLen(out, c_out);
2530
#ifdef DEBUG_ENCODING
2531
  xmlGenericError(xmlGenericErrorContext,
2532
    "initialized encoder\n");
2533
#endif
2534
1.45k
        return(c_out);
2535
1.45k
    }
2536
2537
    /*
2538
     * Conversion itself.
2539
     */
2540
6.68k
    toconv = xmlBufUse(in);
2541
6.68k
    if (toconv == 0)
2542
1.33k
        return (writtentot);
2543
5.35k
    if (toconv > 64 * 1024)
2544
0
        toconv = 64 * 1024;
2545
5.35k
    if (toconv * 4 >= written) {
2546
1.13k
        xmlBufGrow(out, toconv * 4);
2547
1.13k
        written = xmlBufAvail(out);
2548
1.13k
    }
2549
5.35k
    if (written > 256 * 1024)
2550
30
        written = 256 * 1024;
2551
2552
5.35k
    c_in = toconv;
2553
5.35k
    c_out = written;
2554
5.35k
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2555
5.35k
                            xmlBufContent(in), &c_in);
2556
5.35k
    xmlBufShrink(in, c_in);
2557
5.35k
    xmlBufAddLen(out, c_out);
2558
5.35k
    writtentot += c_out;
2559
5.35k
    if (ret == -1) {
2560
0
        if (c_out > 0) {
2561
            /* Can be a limitation of iconv or uconv */
2562
0
            goto retry;
2563
0
        }
2564
0
        ret = -3;
2565
0
    }
2566
2567
    /*
2568
     * Attempt to handle error cases
2569
     */
2570
5.35k
    switch (ret) {
2571
4.56k
        case 0:
2572
#ifdef DEBUG_ENCODING
2573
      xmlGenericError(xmlGenericErrorContext,
2574
        "converted %d bytes to %d bytes of output\n",
2575
              c_in, c_out);
2576
#endif
2577
4.56k
      break;
2578
0
        case -1:
2579
#ifdef DEBUG_ENCODING
2580
      xmlGenericError(xmlGenericErrorContext,
2581
        "output conversion failed by lack of space\n");
2582
#endif
2583
0
      break;
2584
16
        case -3:
2585
#ifdef DEBUG_ENCODING
2586
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2587
              c_in, c_out, (int) xmlBufUse(in));
2588
#endif
2589
16
      break;
2590
0
        case -4:
2591
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2592
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2593
0
            ret = -1;
2594
0
            break;
2595
770
        case -2: {
2596
770
      xmlChar charref[20];
2597
770
      int len = xmlBufUse(in);
2598
770
            xmlChar *content = xmlBufContent(in);
2599
770
      int cur, charrefLen;
2600
2601
770
      cur = xmlGetUTF8Char(content, &len);
2602
770
      if (cur <= 0)
2603
232
                break;
2604
2605
#ifdef DEBUG_ENCODING
2606
            xmlGenericError(xmlGenericErrorContext,
2607
                    "handling output conversion error\n");
2608
            xmlGenericError(xmlGenericErrorContext,
2609
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2610
                    content[0], content[1],
2611
                    content[2], content[3]);
2612
#endif
2613
            /*
2614
             * Removes the UTF8 sequence, and replace it by a charref
2615
             * and continue the transcoding phase, hoping the error
2616
             * did not mangle the encoder state.
2617
             */
2618
538
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2619
538
                             "&#%d;", cur);
2620
538
            xmlBufShrink(in, len);
2621
538
            xmlBufGrow(out, charrefLen * 4);
2622
538
            c_out = xmlBufAvail(out);
2623
538
            c_in = charrefLen;
2624
538
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2625
538
                                    charref, &c_in);
2626
2627
538
      if ((ret < 0) || (c_in != charrefLen)) {
2628
0
    char buf[50];
2629
2630
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2631
0
       content[0], content[1],
2632
0
       content[2], content[3]);
2633
0
    buf[49] = 0;
2634
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2635
0
        "output conversion failed due to conv error, bytes %s\n",
2636
0
             buf);
2637
0
    content[0] = ' ';
2638
0
                break;
2639
0
      }
2640
2641
538
            xmlBufAddLen(out, c_out);
2642
538
            writtentot += c_out;
2643
538
            goto retry;
2644
538
  }
2645
5.35k
    }
2646
4.81k
    return(writtentot ? writtentot : ret);
2647
5.35k
}
2648
#endif
2649
2650
/**
2651
 * xmlCharEncOutFunc:
2652
 * @handler:  char encoding transformation data structure
2653
 * @out:  an xmlBuffer for the output.
2654
 * @in:  an xmlBuffer for the input
2655
 *
2656
 * Generic front-end for the encoding handler output function
2657
 * a first call with @in == NULL has to be made firs to initiate the
2658
 * output in case of non-stateless encoding needing to initiate their
2659
 * state or the output (like the BOM in UTF16).
2660
 * In case of UTF8 sequence conversion errors for the given encoder,
2661
 * the content will be automatically remapped to a CharRef sequence.
2662
 *
2663
 * Returns the number of byte written if success, or
2664
 *     -1 general error
2665
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2666
 *        the result of transformation can't fit into the encoding we want), or
2667
 */
2668
int
2669
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2670
0
                  xmlBufferPtr in) {
2671
0
    int ret;
2672
0
    int written;
2673
0
    int writtentot = 0;
2674
0
    int toconv;
2675
2676
0
    if (handler == NULL) return(-1);
2677
0
    if (out == NULL) return(-1);
2678
2679
0
retry:
2680
2681
0
    written = out->size - out->use;
2682
2683
0
    if (written > 0)
2684
0
  written--; /* Gennady: count '/0' */
2685
2686
    /*
2687
     * First specific handling of in = NULL, i.e. the initialization call
2688
     */
2689
0
    if (in == NULL) {
2690
0
        toconv = 0;
2691
        /* TODO: Check return value. */
2692
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2693
0
                          NULL, &toconv);
2694
0
        out->use += written;
2695
0
        out->content[out->use] = 0;
2696
#ifdef DEBUG_ENCODING
2697
  xmlGenericError(xmlGenericErrorContext,
2698
    "initialized encoder\n");
2699
#endif
2700
0
        return(0);
2701
0
    }
2702
2703
    /*
2704
     * Conversion itself.
2705
     */
2706
0
    toconv = in->use;
2707
0
    if (toconv == 0)
2708
0
  return(0);
2709
0
    if (toconv * 4 >= written) {
2710
0
        xmlBufferGrow(out, toconv * 4);
2711
0
  written = out->size - out->use - 1;
2712
0
    }
2713
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714
0
                            in->content, &toconv);
2715
0
    xmlBufferShrink(in, toconv);
2716
0
    out->use += written;
2717
0
    writtentot += written;
2718
0
    out->content[out->use] = 0;
2719
0
    if (ret == -1) {
2720
0
        if (written > 0) {
2721
            /* Can be a limitation of iconv or uconv */
2722
0
            goto retry;
2723
0
        }
2724
0
        ret = -3;
2725
0
    }
2726
2727
    /*
2728
     * Attempt to handle error cases
2729
     */
2730
0
    switch (ret) {
2731
0
        case 0:
2732
#ifdef DEBUG_ENCODING
2733
      xmlGenericError(xmlGenericErrorContext,
2734
        "converted %d bytes to %d bytes of output\n",
2735
              toconv, written);
2736
#endif
2737
0
      break;
2738
0
        case -1:
2739
#ifdef DEBUG_ENCODING
2740
      xmlGenericError(xmlGenericErrorContext,
2741
        "output conversion failed by lack of space\n");
2742
#endif
2743
0
      break;
2744
0
        case -3:
2745
#ifdef DEBUG_ENCODING
2746
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2747
              toconv, written, in->use);
2748
#endif
2749
0
      break;
2750
0
        case -4:
2751
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2752
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2753
0
      ret = -1;
2754
0
            break;
2755
0
        case -2: {
2756
0
      xmlChar charref[20];
2757
0
      int len = in->use;
2758
0
      const xmlChar *utf = (const xmlChar *) in->content;
2759
0
      int cur, charrefLen;
2760
2761
0
      cur = xmlGetUTF8Char(utf, &len);
2762
0
      if (cur <= 0)
2763
0
                break;
2764
2765
#ifdef DEBUG_ENCODING
2766
            xmlGenericError(xmlGenericErrorContext,
2767
                    "handling output conversion error\n");
2768
            xmlGenericError(xmlGenericErrorContext,
2769
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2770
                    in->content[0], in->content[1],
2771
                    in->content[2], in->content[3]);
2772
#endif
2773
            /*
2774
             * Removes the UTF8 sequence, and replace it by a charref
2775
             * and continue the transcoding phase, hoping the error
2776
             * did not mangle the encoder state.
2777
             */
2778
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2779
0
                             "&#%d;", cur);
2780
0
            xmlBufferShrink(in, len);
2781
0
            xmlBufferGrow(out, charrefLen * 4);
2782
0
      written = out->size - out->use - 1;
2783
0
            toconv = charrefLen;
2784
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2785
0
                                    charref, &toconv);
2786
2787
0
      if ((ret < 0) || (toconv != charrefLen)) {
2788
0
    char buf[50];
2789
2790
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2791
0
       in->content[0], in->content[1],
2792
0
       in->content[2], in->content[3]);
2793
0
    buf[49] = 0;
2794
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2795
0
        "output conversion failed due to conv error, bytes %s\n",
2796
0
             buf);
2797
0
    in->content[0] = ' ';
2798
0
          break;
2799
0
      }
2800
2801
0
            out->use += written;
2802
0
            writtentot += written;
2803
0
            out->content[out->use] = 0;
2804
0
            goto retry;
2805
0
  }
2806
0
    }
2807
0
    return(writtentot ? writtentot : ret);
2808
0
}
2809
2810
/**
2811
 * xmlCharEncCloseFunc:
2812
 * @handler:  char encoding transformation data structure
2813
 *
2814
 * Generic front-end for encoding handler close function
2815
 *
2816
 * Returns 0 if success, or -1 in case of error
2817
 */
2818
int
2819
7.77k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2820
7.77k
    int ret = 0;
2821
7.77k
    int tofree = 0;
2822
7.77k
    int i = 0;
2823
2824
7.77k
    if (handler == NULL) return(-1);
2825
7.77k
    if (handler->name == NULL) return(-1);
2826
2827
40.4k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2828
38.8k
        if (handler == &defaultHandlers[i])
2829
6.21k
            return(0);
2830
38.8k
    }
2831
2832
1.56k
    if (handlers != NULL) {
2833
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2834
0
            if (handler == handlers[i])
2835
0
                return(0);
2836
0
  }
2837
0
    }
2838
1.56k
#ifdef LIBXML_ICONV_ENABLED
2839
    /*
2840
     * Iconv handlers can be used only once, free the whole block.
2841
     * and the associated icon resources.
2842
     */
2843
1.56k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2844
1.56k
        tofree = 1;
2845
1.56k
  if (handler->iconv_out != NULL) {
2846
1.56k
      if (iconv_close(handler->iconv_out))
2847
0
    ret = -1;
2848
1.56k
      handler->iconv_out = NULL;
2849
1.56k
  }
2850
1.56k
  if (handler->iconv_in != NULL) {
2851
1.56k
      if (iconv_close(handler->iconv_in))
2852
0
    ret = -1;
2853
1.56k
      handler->iconv_in = NULL;
2854
1.56k
  }
2855
1.56k
    }
2856
1.56k
#endif /* LIBXML_ICONV_ENABLED */
2857
#ifdef LIBXML_ICU_ENABLED
2858
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2859
        tofree = 1;
2860
  if (handler->uconv_out != NULL) {
2861
      closeIcuConverter(handler->uconv_out);
2862
      handler->uconv_out = NULL;
2863
  }
2864
  if (handler->uconv_in != NULL) {
2865
      closeIcuConverter(handler->uconv_in);
2866
      handler->uconv_in = NULL;
2867
  }
2868
    }
2869
#endif
2870
1.56k
    if (tofree) {
2871
        /* free up only dynamic handlers iconv/uconv */
2872
1.56k
        if (handler->name != NULL)
2873
1.56k
            xmlFree(handler->name);
2874
1.56k
        handler->name = NULL;
2875
1.56k
        xmlFree(handler);
2876
1.56k
    }
2877
#ifdef DEBUG_ENCODING
2878
    if (ret)
2879
        xmlGenericError(xmlGenericErrorContext,
2880
    "failed to close the encoding handler\n");
2881
    else
2882
        xmlGenericError(xmlGenericErrorContext,
2883
    "closed the encoding handler\n");
2884
#endif
2885
2886
1.56k
    return(ret);
2887
1.56k
}
2888
2889
/**
2890
 * xmlByteConsumed:
2891
 * @ctxt: an XML parser context
2892
 *
2893
 * This function provides the current index of the parser relative
2894
 * to the start of the current entity. This function is computed in
2895
 * bytes from the beginning starting at zero and finishing at the
2896
 * size in byte of the file if parsing a file. The function is
2897
 * of constant cost if the input is UTF-8 but can be costly if run
2898
 * on non-UTF-8 input.
2899
 *
2900
 * Returns the index in bytes from the beginning of the entity or -1
2901
 *         in case the index could not be computed.
2902
 */
2903
long
2904
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2905
0
    xmlParserInputPtr in;
2906
2907
0
    if (ctxt == NULL) return(-1);
2908
0
    in = ctxt->input;
2909
0
    if (in == NULL)  return(-1);
2910
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2911
0
        unsigned int unused = 0;
2912
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2913
        /*
2914
   * Encoding conversion, compute the number of unused original
2915
   * bytes from the input not consumed and subtract that from
2916
   * the raw consumed value, this is not a cheap operation
2917
   */
2918
0
        if (in->end - in->cur > 0) {
2919
0
      unsigned char convbuf[32000];
2920
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2921
0
      int toconv = in->end - in->cur, written = 32000;
2922
2923
0
      int ret;
2924
2925
0
            do {
2926
0
                toconv = in->end - cur;
2927
0
                written = 32000;
2928
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2929
0
                                        cur, &toconv);
2930
0
                if (ret < 0) {
2931
0
                    if (written > 0)
2932
0
                        ret = -2;
2933
0
                    else
2934
0
                        return(-1);
2935
0
                }
2936
0
                unused += written;
2937
0
                cur += toconv;
2938
0
            } while (ret == -2);
2939
0
  }
2940
0
  if (in->buf->rawconsumed < unused)
2941
0
      return(-1);
2942
0
  return(in->buf->rawconsumed - unused);
2943
0
    }
2944
0
    return(in->consumed + (in->cur - in->base));
2945
0
}
2946
2947
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2948
#ifdef LIBXML_ISO8859X_ENABLED
2949
2950
/**
2951
 * UTF8ToISO8859x:
2952
 * @out:  a pointer to an array of bytes to store the result
2953
 * @outlen:  the length of @out
2954
 * @in:  a pointer to an array of UTF-8 chars
2955
 * @inlen:  the length of @in
2956
 * @xlattable: the 2-level transcoding table
2957
 *
2958
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2959
 * block of chars out.
2960
 *
2961
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2962
 * The value of @inlen after return is the number of octets consumed
2963
 *     as the return value is positive, else unpredictable.
2964
 * The value of @outlen after return is the number of octets consumed.
2965
 */
2966
static int
2967
UTF8ToISO8859x(unsigned char* out, int *outlen,
2968
              const unsigned char* in, int *inlen,
2969
              const unsigned char* const xlattable) {
2970
    const unsigned char* outstart = out;
2971
    const unsigned char* inend;
2972
    const unsigned char* instart = in;
2973
    const unsigned char* processed = in;
2974
2975
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2976
        (xlattable == NULL))
2977
  return(-1);
2978
    if (in == NULL) {
2979
        /*
2980
        * initialization nothing to do
2981
        */
2982
        *outlen = 0;
2983
        *inlen = 0;
2984
        return(0);
2985
    }
2986
    inend = in + (*inlen);
2987
    while (in < inend) {
2988
        unsigned char d = *in++;
2989
        if  (d < 0x80)  {
2990
            *out++ = d;
2991
        } else if (d < 0xC0) {
2992
            /* trailing byte in leading position */
2993
            *outlen = out - outstart;
2994
            *inlen = processed - instart;
2995
            return(-2);
2996
        } else if (d < 0xE0) {
2997
            unsigned char c;
2998
            if (!(in < inend)) {
2999
                /* trailing byte not in input buffer */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-3);
3003
            }
3004
            c = *in++;
3005
            if ((c & 0xC0) != 0x80) {
3006
                /* not a trailing byte */
3007
                *outlen = out - outstart;
3008
                *inlen = processed - instart;
3009
                return(-2);
3010
            }
3011
            c = c & 0x3F;
3012
            d = d & 0x1F;
3013
            d = xlattable [48 + c + xlattable [d] * 64];
3014
            if (d == 0) {
3015
                /* not in character set */
3016
                *outlen = out - outstart;
3017
                *inlen = processed - instart;
3018
                return(-2);
3019
            }
3020
            *out++ = d;
3021
        } else if (d < 0xF0) {
3022
            unsigned char c1;
3023
            unsigned char c2;
3024
            if (!(in < inend - 1)) {
3025
                /* trailing bytes not in input buffer */
3026
                *outlen = out - outstart;
3027
                *inlen = processed - instart;
3028
                return(-3);
3029
            }
3030
            c1 = *in++;
3031
            if ((c1 & 0xC0) != 0x80) {
3032
                /* not a trailing byte (c1) */
3033
                *outlen = out - outstart;
3034
                *inlen = processed - instart;
3035
                return(-2);
3036
            }
3037
            c2 = *in++;
3038
            if ((c2 & 0xC0) != 0x80) {
3039
                /* not a trailing byte (c2) */
3040
                *outlen = out - outstart;
3041
                *inlen = processed - instart;
3042
                return(-2);
3043
            }
3044
            c1 = c1 & 0x3F;
3045
            c2 = c2 & 0x3F;
3046
      d = d & 0x0F;
3047
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3048
      xlattable [32 + d] * 64] * 64];
3049
            if (d == 0) {
3050
                /* not in character set */
3051
                *outlen = out - outstart;
3052
                *inlen = processed - instart;
3053
                return(-2);
3054
            }
3055
            *out++ = d;
3056
        } else {
3057
            /* cannot transcode >= U+010000 */
3058
            *outlen = out - outstart;
3059
            *inlen = processed - instart;
3060
            return(-2);
3061
        }
3062
        processed = in;
3063
    }
3064
    *outlen = out - outstart;
3065
    *inlen = processed - instart;
3066
    return(*outlen);
3067
}
3068
3069
/**
3070
 * ISO8859xToUTF8
3071
 * @out:  a pointer to an array of bytes to store the result
3072
 * @outlen:  the length of @out
3073
 * @in:  a pointer to an array of ISO Latin 1 chars
3074
 * @inlen:  the length of @in
3075
 *
3076
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3077
 * block of chars out.
3078
 * Returns 0 if success, or -1 otherwise
3079
 * The value of @inlen after return is the number of octets consumed
3080
 * The value of @outlen after return is the number of octets produced.
3081
 */
3082
static int
3083
ISO8859xToUTF8(unsigned char* out, int *outlen,
3084
              const unsigned char* in, int *inlen,
3085
              unsigned short const *unicodetable) {
3086
    unsigned char* outstart = out;
3087
    unsigned char* outend;
3088
    const unsigned char* instart = in;
3089
    const unsigned char* inend;
3090
    const unsigned char* instop;
3091
    unsigned int c;
3092
3093
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3094
        (in == NULL) || (unicodetable == NULL))
3095
  return(-1);
3096
    outend = out + *outlen;
3097
    inend = in + *inlen;
3098
    instop = inend;
3099
3100
    while ((in < inend) && (out < outend - 2)) {
3101
        if (*in >= 0x80) {
3102
            c = unicodetable [*in - 0x80];
3103
            if (c == 0) {
3104
                /* undefined code point */
3105
                *outlen = out - outstart;
3106
                *inlen = in - instart;
3107
                return (-1);
3108
            }
3109
            if (c < 0x800) {
3110
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3111
                *out++ = (c & 0x3F) | 0x80;
3112
            } else {
3113
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3114
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3115
                *out++ = (c & 0x3F) | 0x80;
3116
            }
3117
            ++in;
3118
        }
3119
        if (instop - in > outend - out) instop = in + (outend - out);
3120
        while ((*in < 0x80) && (in < instop)) {
3121
            *out++ = *in++;
3122
        }
3123
    }
3124
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3125
        *out++ =  *in++;
3126
    }
3127
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3128
        *out++ =  *in++;
3129
    }
3130
    *outlen = out - outstart;
3131
    *inlen = in - instart;
3132
    return (*outlen);
3133
}
3134
3135
3136
/************************************************************************
3137
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3138
 ************************************************************************/
3139
3140
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3146
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3147
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3148
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3149
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3150
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3151
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3152
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3153
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3154
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3155
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3156
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3160
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3170
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3172
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3180
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3181
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3182
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3183
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3184
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3185
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3186
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3187
};
3188
3189
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3190
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3195
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3196
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3197
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3198
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3199
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3201
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3202
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3203
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3205
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3206
};
3207
3208
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3209
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3222
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3234
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3236
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3237
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3238
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3239
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3240
};
3241
3242
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3243
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3244
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3245
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3246
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3247
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3248
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3249
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3250
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3251
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3252
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3253
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3254
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3255
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3256
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3257
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3258
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3259
};
3260
3261
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3262
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3270
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3271
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3272
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3273
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3274
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3276
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3280
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3286
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3287
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3288
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3289
};
3290
3291
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3292
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3297
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3298
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3299
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3300
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3301
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3302
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3303
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3304
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3305
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3306
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3307
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3308
};
3309
3310
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3311
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3323
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3324
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3326
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3347
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3349
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3350
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3351
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3352
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3353
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3354
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3355
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3356
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3360
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3377
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3378
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3379
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3380
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
};
3384
3385
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3386
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3387
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3388
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3389
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3390
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3391
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3392
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3393
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3394
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3395
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3396
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3397
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3398
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3399
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3400
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3401
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3402
};
3403
3404
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3405
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3413
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3414
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3415
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3416
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3431
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
};
3437
3438
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3439
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3440
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3441
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3442
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3443
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3444
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3445
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3446
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3447
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3448
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3449
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3450
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3451
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3452
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3453
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3454
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3455
};
3456
3457
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3458
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3466
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3467
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3468
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3469
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3482
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3487
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
};
3490
3491
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3492
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3493
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3494
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3495
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3496
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3497
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3498
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3499
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3500
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3501
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3502
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3503
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3504
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3505
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3506
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3507
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3508
};
3509
3510
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3511
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3519
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3520
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3524
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3525
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
};
3535
3536
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3537
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3538
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3539
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3540
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3541
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3542
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3543
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3544
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3545
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3546
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3547
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3548
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3549
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3550
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3551
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3552
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3553
};
3554
3555
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3556
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3564
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3565
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3566
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3568
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3570
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3574
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3575
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3584
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3585
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3586
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3587
};
3588
3589
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3590
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3591
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3592
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3593
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3594
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3595
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3596
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3597
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3598
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3599
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3600
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3601
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3602
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3603
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3604
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3605
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3606
};
3607
3608
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3609
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3617
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3618
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3624
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3625
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3626
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3627
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3628
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3633
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
};
3637
3638
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3639
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3644
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3645
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3646
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3647
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3648
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3649
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3650
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3651
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3652
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3653
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3654
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3655
};
3656
3657
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3658
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3668
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3669
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3679
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3681
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3683
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3684
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3685
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3686
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3688
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3689
};
3690
3691
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3692
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3693
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3694
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3695
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3696
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3697
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3698
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3699
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3700
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3701
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3702
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3703
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3704
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3705
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3706
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3707
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3708
};
3709
3710
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3711
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3719
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3720
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3721
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3726
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3746
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3751
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3752
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3753
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3754
};
3755
3756
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3757
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3758
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3759
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3760
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3761
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3762
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3763
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3764
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3765
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3766
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3767
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3768
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3769
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3770
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3771
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3772
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3773
};
3774
3775
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3776
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3784
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3785
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3786
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3787
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3794
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3799
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3801
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3803
};
3804
3805
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3806
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3811
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3812
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3813
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3814
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3815
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3817
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3818
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3819
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3821
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3822
};
3823
3824
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3825
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3835
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3836
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3837
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3842
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3844
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3860
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3861
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3862
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3863
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3864
};
3865
3866
3867
/*
3868
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3869
 */
3870
3871
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3874
}
3875
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3878
}
3879
3880
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3883
}
3884
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3887
}
3888
3889
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3892
}
3893
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3894
    const unsigned char* in, int *inlen) {
3895
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3896
}
3897
3898
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3901
}
3902
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3903
    const unsigned char* in, int *inlen) {
3904
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3905
}
3906
3907
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3910
}
3911
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3912
    const unsigned char* in, int *inlen) {
3913
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3914
}
3915
3916
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3919
}
3920
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3921
    const unsigned char* in, int *inlen) {
3922
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3923
}
3924
3925
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3928
}
3929
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3930
    const unsigned char* in, int *inlen) {
3931
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3932
}
3933
3934
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3937
}
3938
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3939
    const unsigned char* in, int *inlen) {
3940
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3941
}
3942
3943
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3946
}
3947
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3948
    const unsigned char* in, int *inlen) {
3949
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3950
}
3951
3952
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3955
}
3956
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3957
    const unsigned char* in, int *inlen) {
3958
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3959
}
3960
3961
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3964
}
3965
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3966
    const unsigned char* in, int *inlen) {
3967
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3968
}
3969
3970
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3973
}
3974
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3975
    const unsigned char* in, int *inlen) {
3976
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3977
}
3978
3979
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3980
    const unsigned char* in, int *inlen) {
3981
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3982
}
3983
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3984
    const unsigned char* in, int *inlen) {
3985
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3986
}
3987
3988
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3989
    const unsigned char* in, int *inlen) {
3990
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3991
}
3992
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3993
    const unsigned char* in, int *inlen) {
3994
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3995
}
3996
3997
#endif
3998
#endif
3999