Coverage Report

Created: 2023-09-28 22:19

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
9.17k
{
103
9.17k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
9.17k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
9.17k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
9.17k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
320k
              const unsigned char* in, int *inlen) {
182
320k
    unsigned char* outstart = out;
183
320k
    const unsigned char* base = in;
184
320k
    const unsigned char* processed = in;
185
320k
    unsigned char* outend = out + *outlen;
186
320k
    const unsigned char* inend;
187
320k
    unsigned int c;
188
189
320k
    inend = in + (*inlen);
190
5.49M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
5.47M
  c= *in++;
192
193
5.47M
        if (out >= outend)
194
0
      break;
195
5.47M
        if (c < 0x80) {
196
5.16M
      *out++ = c;
197
5.16M
  } else {
198
302k
      *outlen = out - outstart;
199
302k
      *inlen = processed - base;
200
302k
      return(-1);
201
302k
  }
202
203
5.16M
  processed = (const unsigned char*) in;
204
5.16M
    }
205
18.4k
    *outlen = out - outstart;
206
18.4k
    *inlen = processed - base;
207
18.4k
    return(*outlen);
208
320k
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
11.3k
              const unsigned char* in, int *inlen) {
229
11.3k
    const unsigned char* processed = in;
230
11.3k
    const unsigned char* outend;
231
11.3k
    const unsigned char* outstart = out;
232
11.3k
    const unsigned char* instart = in;
233
11.3k
    const unsigned char* inend;
234
11.3k
    unsigned int c, d;
235
11.3k
    int trailing;
236
237
11.3k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
11.3k
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
1.13k
  *outlen = 0;
243
1.13k
  *inlen = 0;
244
1.13k
  return(0);
245
1.13k
    }
246
10.2k
    inend = in + (*inlen);
247
10.2k
    outend = out + (*outlen);
248
905k
    while (in < inend) {
249
900k
  d = *in++;
250
900k
  if      (d < 0x80)  { c= d; trailing= 0; }
251
4.49k
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
2
      *outlen = out - outstart;
254
2
      *inlen = processed - instart;
255
2
      return(-2);
256
4.48k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
869
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
377
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
900k
  if (inend - in < trailing) {
267
1
      break;
268
1
  }
269
270
905k
  for ( ; trailing; trailing--) {
271
5.73k
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
5.73k
      c <<= 6;
274
5.73k
      c |= d & 0x3F;
275
5.73k
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
900k
  if (c < 0x80) {
279
895k
      if (out >= outend)
280
0
    break;
281
895k
      *out++ = c;
282
895k
  } else {
283
      /* no chance for this in Ascii */
284
4.48k
      *outlen = out - outstart;
285
4.48k
      *inlen = processed - instart;
286
4.48k
      return(-2);
287
4.48k
  }
288
895k
  processed = in;
289
895k
    }
290
5.72k
    *outlen = out - outstart;
291
5.72k
    *inlen = processed - instart;
292
5.72k
    return(*outlen);
293
10.2k
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
891k
              const unsigned char* in, int *inlen) {
313
891k
    unsigned char* outstart = out;
314
891k
    const unsigned char* base = in;
315
891k
    unsigned char* outend;
316
891k
    const unsigned char* inend;
317
891k
    const unsigned char* instop;
318
319
891k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
891k
    outend = out + *outlen;
323
891k
    inend = in + (*inlen);
324
891k
    instop = inend;
325
326
2.81M
    while ((in < inend) && (out < outend - 1)) {
327
1.92M
  if (*in >= 0x80) {
328
1.03M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
1.03M
            *out++ = ((*in) & 0x3F) | 0x80;
330
1.03M
      ++in;
331
1.03M
  }
332
1.92M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
299M
  while ((in < instop) && (*in < 0x80)) {
334
297M
      *out++ = *in++;
335
297M
  }
336
1.92M
    }
337
891k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
891k
    *outlen = out - outstart;
341
891k
    *inlen = in - base;
342
891k
    return(*outlen);
343
891k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
14.9k
{
362
14.9k
    int len;
363
364
14.9k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
14.9k
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
3.64k
        *outlen = 0;
369
3.64k
        *inlenb = 0;
370
3.64k
        return(0);
371
3.64k
    }
372
11.3k
    if (*outlen > *inlenb) {
373
11.3k
  len = *inlenb;
374
11.3k
    } else {
375
0
  len = *outlen;
376
0
    }
377
11.3k
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
11.3k
    memcpy(out, inb, len);
386
387
11.3k
    *outlen = len;
388
11.3k
    *inlenb = len;
389
11.3k
    return(*outlen);
390
11.3k
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
117k
              const unsigned char* in, int *inlen) {
413
117k
    const unsigned char* processed = in;
414
117k
    const unsigned char* outend;
415
117k
    const unsigned char* outstart = out;
416
117k
    const unsigned char* instart = in;
417
117k
    const unsigned char* inend;
418
117k
    unsigned int c, d;
419
117k
    int trailing;
420
421
117k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
117k
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
3.17k
  *outlen = 0;
427
3.17k
  *inlen = 0;
428
3.17k
  return(0);
429
3.17k
    }
430
114k
    inend = in + (*inlen);
431
114k
    outend = out + (*outlen);
432
59.1M
    while (in < inend) {
433
59.0M
  d = *in++;
434
59.0M
  if      (d < 0x80)  { c= d; trailing= 0; }
435
188k
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
1.01k
      *outlen = out - outstart;
438
1.01k
      *inlen = processed - instart;
439
1.01k
      return(-2);
440
187k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
48.1k
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
1.04k
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
92
  else {
444
      /* no chance for this in IsoLat1 */
445
92
      *outlen = out - outstart;
446
92
      *inlen = processed - instart;
447
92
      return(-2);
448
92
  }
449
450
59.0M
  if (inend - in < trailing) {
451
57
      break;
452
57
  }
453
454
59.2M
  for ( ; trailing; trailing--) {
455
236k
      if (in >= inend)
456
0
    break;
457
236k
      if (((d= *in++) & 0xC0) != 0x80) {
458
260
    *outlen = out - outstart;
459
260
    *inlen = processed - instart;
460
260
    return(-2);
461
260
      }
462
236k
      c <<= 6;
463
236k
      c |= d & 0x3F;
464
236k
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
59.0M
  if (c <= 0xFF) {
468
59.0M
      if (out >= outend)
469
0
    break;
470
59.0M
      *out++ = c;
471
59.0M
  } else {
472
      /* no chance for this in IsoLat1 */
473
48.6k
      *outlen = out - outstart;
474
48.6k
      *inlen = processed - instart;
475
48.6k
      return(-2);
476
48.6k
  }
477
59.0M
  processed = in;
478
59.0M
    }
479
64.5k
    *outlen = out - outstart;
480
64.5k
    *inlen = processed - instart;
481
64.5k
    return(*outlen);
482
114k
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
66.1k
{
506
66.1k
    unsigned char* outstart = out;
507
66.1k
    const unsigned char* processed = inb;
508
66.1k
    unsigned char* outend;
509
66.1k
    unsigned short* in = (unsigned short*) inb;
510
66.1k
    unsigned short* inend;
511
66.1k
    unsigned int c, d, inlen;
512
66.1k
    unsigned char *tmp;
513
66.1k
    int bits;
514
515
66.1k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
66.1k
    outend = out + *outlen;
520
66.1k
    if ((*inlenb % 2) == 1)
521
15.7k
        (*inlenb)--;
522
66.1k
    inlen = *inlenb / 2;
523
66.1k
    inend = in + inlen;
524
7.21M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
7.15M
        if (xmlLittleEndian) {
526
7.15M
      c= *in++;
527
7.15M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
7.15M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
5.15k
      if (in >= inend) {           /* handle split mutli-byte characters */
535
2.51k
    break;
536
2.51k
      }
537
2.64k
      if (xmlLittleEndian) {
538
2.64k
    d = *in++;
539
2.64k
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
2.64k
            if ((d & 0xFC00) == 0xDC00) {
546
1.25k
                c &= 0x03FF;
547
1.25k
                c <<= 10;
548
1.25k
                c |= d & 0x03FF;
549
1.25k
                c += 0x10000;
550
1.25k
            }
551
1.38k
            else {
552
1.38k
    *outlen = out - outstart;
553
1.38k
    *inlenb = processed - inb;
554
1.38k
          return(-2);
555
1.38k
      }
556
2.64k
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
7.15M
        if (out >= outend)
560
0
      break;
561
7.15M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
7.13M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
7.12M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
1.25k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
21.4M
        for ( ; bits >= 0; bits-= 6) {
567
14.2M
            if (out >= outend)
568
0
          break;
569
14.2M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
14.2M
        }
571
7.15M
  processed = (const unsigned char*) in;
572
7.15M
    }
573
64.7k
    *outlen = out - outstart;
574
64.7k
    *inlenb = processed - inb;
575
64.7k
    return(*outlen);
576
66.1k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
85.3k
{
596
85.3k
    unsigned short* out = (unsigned short*) outb;
597
85.3k
    const unsigned char* processed = in;
598
85.3k
    const unsigned char *const instart = in;
599
85.3k
    unsigned short* outstart= out;
600
85.3k
    unsigned short* outend;
601
85.3k
    const unsigned char* inend;
602
85.3k
    unsigned int c, d;
603
85.3k
    int trailing;
604
85.3k
    unsigned char *tmp;
605
85.3k
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
85.3k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
85.3k
    if (in == NULL) {
610
37
  *outlen = 0;
611
37
  *inlen = 0;
612
37
  return(0);
613
37
    }
614
85.3k
    inend= in + *inlen;
615
85.3k
    outend = out + (*outlen / 2);
616
608k
    while (in < inend) {
617
605k
      d= *in++;
618
605k
      if      (d < 0x80)  { c= d; trailing= 0; }
619
106k
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
3.24k
    *outlen = (out - outstart) * 2;
622
3.24k
    *inlen = processed - instart;
623
3.24k
    return(-2);
624
103k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
94.0k
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
88.2k
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
130
      else {
628
  /* no chance for this in UTF-16 */
629
130
  *outlen = (out - outstart) * 2;
630
130
  *inlen = processed - instart;
631
130
  return(-2);
632
130
      }
633
634
602k
      if (inend - in < trailing) {
635
33
          break;
636
33
      }
637
638
865k
      for ( ; trailing; trailing--) {
639
275k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
12.8k
        break;
641
262k
          c <<= 6;
642
262k
          c |= d & 0x3F;
643
262k
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
602k
        if (c < 0x10000) {
647
518k
            if (out >= outend)
648
0
          break;
649
518k
      if (xmlLittleEndian) {
650
518k
    *out++ = c;
651
518k
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
518k
        }
658
83.6k
        else if (c < 0x110000) {
659
4.84k
            if (out+1 >= outend)
660
0
          break;
661
4.84k
            c -= 0x10000;
662
4.84k
      if (xmlLittleEndian) {
663
4.84k
    *out++ = 0xD800 | (c >> 10);
664
4.84k
    *out++ = 0xDC00 | (c & 0x03FF);
665
4.84k
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
4.84k
        }
679
78.8k
        else
680
78.8k
      break;
681
523k
  processed = in;
682
523k
    }
683
81.9k
    *outlen = (out - outstart) * 2;
684
81.9k
    *inlen = processed - instart;
685
81.9k
    return(*outlen);
686
85.3k
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
85.9k
{
705
85.9k
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
699
        if (*outlen >= 2) {
710
699
      outb[0] = 0xFF;
711
699
      outb[1] = 0xFE;
712
699
      *outlen = 2;
713
699
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
699
      return(2);
719
699
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
699
    }
724
85.2k
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
85.9k
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
60.8k
{
749
60.8k
    unsigned char* outstart = out;
750
60.8k
    const unsigned char* processed = inb;
751
60.8k
    unsigned char* outend;
752
60.8k
    unsigned short* in = (unsigned short*) inb;
753
60.8k
    unsigned short* inend;
754
60.8k
    unsigned int c, d, inlen;
755
60.8k
    unsigned char *tmp;
756
60.8k
    int bits;
757
758
60.8k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
60.8k
    outend = out + *outlen;
763
60.8k
    if ((*inlenb % 2) == 1)
764
14.8k
        (*inlenb)--;
765
60.8k
    inlen = *inlenb / 2;
766
60.8k
    inend= in + inlen;
767
5.91M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
5.86M
  if (xmlLittleEndian) {
769
5.86M
      tmp = (unsigned char *) in;
770
5.86M
      c = *tmp++;
771
5.86M
      c = (c << 8) | *tmp;
772
5.86M
      in++;
773
5.86M
  } else {
774
0
      c= *in++;
775
0
  }
776
5.86M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
4.13k
      if (in >= inend) {           /* handle split mutli-byte characters */
778
1.90k
                break;
779
1.90k
      }
780
2.23k
      if (xmlLittleEndian) {
781
2.23k
    tmp = (unsigned char *) in;
782
2.23k
    d = *tmp++;
783
2.23k
    d = (d << 8) | *tmp;
784
2.23k
    in++;
785
2.23k
      } else {
786
0
    d= *in++;
787
0
      }
788
2.23k
            if ((d & 0xFC00) == 0xDC00) {
789
879
                c &= 0x03FF;
790
879
                c <<= 10;
791
879
                c |= d & 0x03FF;
792
879
                c += 0x10000;
793
879
            }
794
1.35k
            else {
795
1.35k
    *outlen = out - outstart;
796
1.35k
    *inlenb = processed - inb;
797
1.35k
          return(-2);
798
1.35k
      }
799
2.23k
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
5.85M
        if (out >= outend)
803
0
      break;
804
5.85M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
5.84M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
5.83M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
879
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
17.5M
        for ( ; bits >= 0; bits-= 6) {
810
11.6M
            if (out >= outend)
811
0
          break;
812
11.6M
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
11.6M
        }
814
5.85M
  processed = (const unsigned char*) in;
815
5.85M
    }
816
59.5k
    *outlen = out - outstart;
817
59.5k
    *inlenb = processed - inb;
818
59.5k
    return(*outlen);
819
60.8k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
94
{
839
94
    unsigned short* out = (unsigned short*) outb;
840
94
    const unsigned char* processed = in;
841
94
    const unsigned char *const instart = in;
842
94
    unsigned short* outstart= out;
843
94
    unsigned short* outend;
844
94
    const unsigned char* inend;
845
94
    unsigned int c, d;
846
94
    int trailing;
847
94
    unsigned char *tmp;
848
94
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
94
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
94
    if (in == NULL) {
853
47
  *outlen = 0;
854
47
  *inlen = 0;
855
47
  return(0);
856
47
    }
857
47
    inend= in + *inlen;
858
47
    outend = out + (*outlen / 2);
859
2.02k
    while (in < inend) {
860
1.97k
      d= *in++;
861
1.97k
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
1.97k
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
1.97k
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
1.97k
        if (c < 0x10000) {
889
1.97k
            if (out >= outend)  break;
890
1.97k
      if (xmlLittleEndian) {
891
1.97k
    tmp = (unsigned char *) out;
892
1.97k
    *tmp = c >> 8;
893
1.97k
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
1.97k
    out++;
895
1.97k
      } else {
896
0
    *out++ = c;
897
0
      }
898
1.97k
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
1.97k
  processed = in;
922
1.97k
    }
923
47
    *outlen = (out - outstart) * 2;
924
47
    *inlen = processed - instart;
925
47
    return(*outlen);
926
47
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
883k
{
949
883k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
883k
    if (len >= 4) {
952
883k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
883k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
276
      return(XML_CHAR_ENCODING_UCS4BE);
955
883k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
883k
      (in[2] == 0x00) && (in[3] == 0x00))
957
242
      return(XML_CHAR_ENCODING_UCS4LE);
958
883k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
883k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
147
      return(XML_CHAR_ENCODING_UCS4_2143);
961
882k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
882k
      (in[2] == 0x00) && (in[3] == 0x00))
963
126
      return(XML_CHAR_ENCODING_UCS4_3412);
964
882k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
882k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
2.53k
      return(XML_CHAR_ENCODING_EBCDIC);
967
880k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
880k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
369k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
510k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
510k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
2.27k
      return(XML_CHAR_ENCODING_UTF16LE);
978
508k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
508k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
1.11k
      return(XML_CHAR_ENCODING_UTF16BE);
981
508k
    }
982
507k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
507k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
507k
      (in[2] == 0xBF))
989
5.22k
      return(XML_CHAR_ENCODING_UTF8);
990
507k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
501k
    if (len >= 2) {
993
501k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
2.97k
      return(XML_CHAR_ENCODING_UTF16BE);
995
498k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
2.16k
      return(XML_CHAR_ENCODING_UTF16LE);
997
498k
    }
998
496k
    return(XML_CHAR_ENCODING_NONE);
999
501k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
80.0k
xmlGetEncodingAlias(const char *alias) {
1035
80.0k
    int i;
1036
80.0k
    char upper[100];
1037
1038
80.0k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
80.0k
    if (xmlCharEncodingAliases == NULL)
1042
80.0k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = toupper(alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = toupper(alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
121k
{
1167
121k
    const char *alias;
1168
121k
    char upper[500];
1169
121k
    int i;
1170
1171
121k
    if (name == NULL)
1172
104k
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
17.4k
    alias = xmlGetEncodingAlias(name);
1178
17.4k
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
188k
    for (i = 0;i < 499;i++) {
1182
188k
        upper[i] = toupper(name[i]);
1183
188k
  if (upper[i] == 0) break;
1184
188k
    }
1185
17.4k
    upper[i] = 0;
1186
1187
17.4k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
17.4k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
13.8k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
13.7k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
13.0k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
13.0k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
13.0k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
13.0k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
13.0k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
12.3k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
12.3k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
12.3k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
9.17k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
9.17k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
9.17k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
9.14k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
9.14k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
9.14k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
9.10k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
9.08k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
8.87k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
8.84k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
8.82k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
8.80k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
8.78k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
8.78k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
8.78k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
8.55k
    return(XML_CHAR_ENCODING_ERROR);
1235
8.78k
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
939
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
939
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
666
        case XML_CHAR_ENCODING_UCS4LE:
1264
666
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
147
        case XML_CHAR_ENCODING_UCS4_2143:
1268
147
            return("ISO-10646-UCS-4");
1269
126
        case XML_CHAR_ENCODING_UCS4_3412:
1270
126
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS2:
1272
0
            return("ISO-10646-UCS-2");
1273
0
        case XML_CHAR_ENCODING_8859_1:
1274
0
      return("ISO-8859-1");
1275
0
        case XML_CHAR_ENCODING_8859_2:
1276
0
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
939
    }
1300
0
    return(NULL);
1301
939
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
728k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = toupper(name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
3.70k
xmlInitEncodingInternal(void) {
1501
3.70k
    unsigned short int tst = 0x1234;
1502
3.70k
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
3.70k
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
3.70k
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
3.70k
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
1.13M
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
1.13M
    xmlCharEncodingHandlerPtr handler;
1592
1593
1.13M
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
1.01M
        case XML_CHAR_ENCODING_NONE:
1597
1.01M
      return(NULL);
1598
106k
        case XML_CHAR_ENCODING_UTF8:
1599
106k
      return(NULL);
1600
5.18k
        case XML_CHAR_ENCODING_UTF16LE:
1601
5.18k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
4.75k
        case XML_CHAR_ENCODING_UTF16BE:
1603
4.75k
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
3.10k
        case XML_CHAR_ENCODING_EBCDIC:
1605
3.10k
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
3.10k
            if (handler != NULL) return(handler);
1607
3.10k
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
3.10k
            if (handler != NULL) return(handler);
1609
3.10k
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
3.10k
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
364
        case XML_CHAR_ENCODING_UCS4BE:
1615
364
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
364
            if (handler != NULL) return(handler);
1617
364
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
364
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
302
        case XML_CHAR_ENCODING_UCS4LE:
1623
302
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
302
            if (handler != NULL) return(handler);
1625
302
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
302
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
179
        case XML_CHAR_ENCODING_UCS4_2143:
1631
179
      break;
1632
154
        case XML_CHAR_ENCODING_UCS4_3412:
1633
154
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
1.13M
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
333
    return(NULL);
1712
1.13M
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
62.6k
xmlFindCharEncodingHandler(const char *name) {
1725
62.6k
    const char *nalias;
1726
62.6k
    const char *norig;
1727
62.6k
    xmlCharEncoding alias;
1728
62.6k
#ifdef LIBXML_ICONV_ENABLED
1729
62.6k
    xmlCharEncodingHandlerPtr enc;
1730
62.6k
    iconv_t icv_in, icv_out;
1731
62.6k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
62.6k
    char upper[100];
1737
62.6k
    int i;
1738
1739
62.6k
    if (name == NULL) return(NULL);
1740
62.6k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
62.6k
    norig = name;
1746
62.6k
    nalias = xmlGetEncodingAlias(name);
1747
62.6k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
608k
    for (i = 0;i < 99;i++) {
1754
608k
        upper[i] = toupper(name[i]);
1755
608k
  if (upper[i] == 0) break;
1756
608k
    }
1757
62.6k
    upper[i] = 0;
1758
1759
387k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
368k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
42.8k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
368k
    }
1763
1764
19.7k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
19.7k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
19.7k
    icv_in = iconv_open("UTF-8", name);
1779
19.7k
    icv_out = iconv_open(name, "UTF-8");
1780
19.7k
    if (icv_in == (iconv_t) -1) {
1781
7.62k
        icv_in = iconv_open("UTF-8", upper);
1782
7.62k
    }
1783
19.7k
    if (icv_out == (iconv_t) -1) {
1784
7.62k
  icv_out = iconv_open(upper, "UTF-8");
1785
7.62k
    }
1786
19.7k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
12.1k
      enc = (xmlCharEncodingHandlerPtr)
1788
12.1k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
12.1k
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
12.1k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
12.1k
      enc->name = xmlMemStrdup(name);
1796
12.1k
      enc->input = NULL;
1797
12.1k
      enc->output = NULL;
1798
12.1k
      enc->iconv_in = icv_in;
1799
12.1k
      enc->iconv_out = icv_out;
1800
#ifdef DEBUG_ENCODING
1801
            xmlGenericError(xmlGenericErrorContext,
1802
        "Found iconv handler for encoding %s\n", name);
1803
#endif
1804
12.1k
      return enc;
1805
12.1k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1806
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1807
0
        "iconv : problems with filters for '%s'\n", name);
1808
0
      if (icv_in != (iconv_t) -1)
1809
0
    iconv_close(icv_in);
1810
0
      else
1811
0
    iconv_close(icv_out);
1812
0
    }
1813
7.62k
#endif /* LIBXML_ICONV_ENABLED */
1814
#ifdef LIBXML_ICU_ENABLED
1815
    /* check whether icu can handle this */
1816
    ucv_in = openIcuConverter(name, 1);
1817
    ucv_out = openIcuConverter(name, 0);
1818
    if (ucv_in != NULL && ucv_out != NULL) {
1819
      encu = (xmlCharEncodingHandlerPtr)
1820
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1821
      if (encu == NULL) {
1822
                closeIcuConverter(ucv_in);
1823
                closeIcuConverter(ucv_out);
1824
    return(NULL);
1825
      }
1826
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1827
      encu->name = xmlMemStrdup(name);
1828
      encu->input = NULL;
1829
      encu->output = NULL;
1830
      encu->uconv_in = ucv_in;
1831
      encu->uconv_out = ucv_out;
1832
#ifdef DEBUG_ENCODING
1833
            xmlGenericError(xmlGenericErrorContext,
1834
        "Found ICU converter handler for encoding %s\n", name);
1835
#endif
1836
      return encu;
1837
    } else if (ucv_in != NULL || ucv_out != NULL) {
1838
            closeIcuConverter(ucv_in);
1839
            closeIcuConverter(ucv_out);
1840
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1841
        "ICU converter : problems with filters for '%s'\n", name);
1842
    }
1843
#endif /* LIBXML_ICU_ENABLED */
1844
1845
#ifdef DEBUG_ENCODING
1846
    xmlGenericError(xmlGenericErrorContext,
1847
      "No handler found for encoding %s\n", name);
1848
#endif
1849
1850
    /*
1851
     * Fallback using the canonical names
1852
     */
1853
7.62k
    alias = xmlParseCharEncoding(norig);
1854
7.62k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1855
666
        const char* canon;
1856
666
        canon = xmlGetCharEncodingName(alias);
1857
666
        if ((canon != NULL) && (strcmp(name, canon))) {
1858
0
      return(xmlFindCharEncodingHandler(canon));
1859
0
        }
1860
666
    }
1861
1862
    /* If "none of the above", give up */
1863
7.62k
    return(NULL);
1864
7.62k
}
1865
1866
/************************************************************************
1867
 *                  *
1868
 *    ICONV based generic conversion functions    *
1869
 *                  *
1870
 ************************************************************************/
1871
1872
#ifdef LIBXML_ICONV_ENABLED
1873
/**
1874
 * xmlIconvWrapper:
1875
 * @cd:   iconv converter data structure
1876
 * @out:  a pointer to an array of bytes to store the result
1877
 * @outlen:  the length of @out
1878
 * @in:  a pointer to an array of input bytes
1879
 * @inlen:  the length of @in
1880
 *
1881
 * Returns 0 if success, or
1882
 *     -1 by lack of space, or
1883
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1884
 *        the result of transformation can't fit into the encoding we want), or
1885
 *     -3 if there the last byte can't form a single output char.
1886
 *
1887
 * The value of @inlen after return is the number of octets consumed
1888
 *     as the return value is positive, else unpredictable.
1889
 * The value of @outlen after return is the number of octets produced.
1890
 */
1891
static int
1892
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1893
112k
                const unsigned char *in, int *inlen) {
1894
112k
    size_t icv_inlen, icv_outlen;
1895
112k
    const char *icv_in = (const char *) in;
1896
112k
    char *icv_out = (char *) out;
1897
112k
    size_t ret;
1898
1899
112k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900
1.12k
        if (outlen != NULL) *outlen = 0;
1901
1.12k
        return(-1);
1902
1.12k
    }
1903
111k
    icv_inlen = *inlen;
1904
111k
    icv_outlen = *outlen;
1905
    /*
1906
     * Some versions take const, other versions take non-const input.
1907
     */
1908
111k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1909
111k
    *inlen -= icv_inlen;
1910
111k
    *outlen -= icv_outlen;
1911
111k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1912
15.9k
#ifdef EILSEQ
1913
15.9k
        if (errno == EILSEQ) {
1914
7.90k
            return -2;
1915
7.90k
        } else
1916
8.04k
#endif
1917
8.04k
#ifdef E2BIG
1918
8.04k
        if (errno == E2BIG) {
1919
359
            return -1;
1920
359
        } else
1921
7.68k
#endif
1922
7.68k
#ifdef EINVAL
1923
7.68k
        if (errno == EINVAL) {
1924
7.68k
            return -3;
1925
7.68k
        } else
1926
0
#endif
1927
0
        {
1928
0
            return -3;
1929
0
        }
1930
15.9k
    }
1931
95.6k
    return 0;
1932
111k
}
1933
#endif /* LIBXML_ICONV_ENABLED */
1934
1935
/************************************************************************
1936
 *                  *
1937
 *    ICU based generic conversion functions    *
1938
 *                  *
1939
 ************************************************************************/
1940
1941
#ifdef LIBXML_ICU_ENABLED
1942
/**
1943
 * xmlUconvWrapper:
1944
 * @cd: ICU uconverter data structure
1945
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1946
 * @out:  a pointer to an array of bytes to store the result
1947
 * @outlen:  the length of @out
1948
 * @in:  a pointer to an array of input bytes
1949
 * @inlen:  the length of @in
1950
 * @flush: if true, indicates end of input
1951
 *
1952
 * Returns 0 if success, or
1953
 *     -1 by lack of space, or
1954
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1955
 *        the result of transformation can't fit into the encoding we want), or
1956
 *     -3 if there the last byte can't form a single output char.
1957
 *
1958
 * The value of @inlen after return is the number of octets consumed
1959
 *     as the return value is positive, else unpredictable.
1960
 * The value of @outlen after return is the number of octets produced.
1961
 */
1962
static int
1963
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1964
                const unsigned char *in, int *inlen, int flush) {
1965
    const char *ucv_in = (const char *) in;
1966
    char *ucv_out = (char *) out;
1967
    UErrorCode err = U_ZERO_ERROR;
1968
1969
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1970
        if (outlen != NULL) *outlen = 0;
1971
        return(-1);
1972
    }
1973
1974
    if (toUnicode) {
1975
        /* encoding => UTF-16 => UTF-8 */
1976
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1977
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1978
                       &cd->pivot_source, &cd->pivot_target,
1979
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1980
    } else {
1981
        /* UTF-8 => UTF-16 => encoding */
1982
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1983
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1984
                       &cd->pivot_source, &cd->pivot_target,
1985
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1986
    }
1987
    *inlen = ucv_in - (const char*) in;
1988
    *outlen = ucv_out - (char *) out;
1989
    if (U_SUCCESS(err)) {
1990
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1991
        if (flush)
1992
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1993
        return 0;
1994
    }
1995
    if (err == U_BUFFER_OVERFLOW_ERROR)
1996
        return -1;
1997
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1998
        return -2;
1999
    return -3;
2000
}
2001
#endif /* LIBXML_ICU_ENABLED */
2002
2003
/************************************************************************
2004
 *                  *
2005
 *    The real API used by libxml for on-the-fly conversion *
2006
 *                  *
2007
 ************************************************************************/
2008
2009
/**
2010
 * xmlEncInputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 * @flush:  flush (ICU-related)
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
1.44M
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2031
1.44M
    int ret;
2032
1.44M
    (void)flush;
2033
2034
1.44M
    if (handler->input != NULL) {
2035
1.33M
        ret = handler->input(out, outlen, in, inlen);
2036
1.33M
        if (ret > 0)
2037
1.00M
           ret = 0;
2038
1.33M
    }
2039
106k
#ifdef LIBXML_ICONV_ENABLED
2040
106k
    else if (handler->iconv_in != NULL) {
2041
105k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2042
105k
    }
2043
246
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_in != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2047
                              flush);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
246
    else {
2051
246
        *outlen = 0;
2052
246
        *inlen = 0;
2053
246
        ret = -2;
2054
246
    }
2055
2056
1.44M
    return(ret);
2057
1.44M
}
2058
2059
/**
2060
 * xmlEncOutputChunk:
2061
 * @handler:  encoding handler
2062
 * @out:  a pointer to an array of bytes to store the result
2063
 * @outlen:  the length of @out
2064
 * @in:  a pointer to an array of input bytes
2065
 * @inlen:  the length of @in
2066
 *
2067
 * Returns 0 if success, or
2068
 *     -1 by lack of space, or
2069
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2070
 *        the result of transformation can't fit into the encoding we want), or
2071
 *     -3 if there the last byte can't form a single output char.
2072
 *     -4 if no output function was found.
2073
 *
2074
 * The value of @inlen after return is the number of octets consumed
2075
 *     as the return value is 0, else unpredictable.
2076
 * The value of @outlen after return is the number of octets produced.
2077
 */
2078
static int
2079
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2080
237k
                  int *outlen, const unsigned char *in, int *inlen) {
2081
237k
    int ret;
2082
2083
237k
    if (handler->output != NULL) {
2084
230k
        ret = handler->output(out, outlen, in, inlen);
2085
230k
        if (ret > 0)
2086
85.5k
           ret = 0;
2087
230k
    }
2088
6.91k
#ifdef LIBXML_ICONV_ENABLED
2089
6.91k
    else if (handler->iconv_out != NULL) {
2090
6.91k
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2091
6.91k
    }
2092
0
#endif /* LIBXML_ICONV_ENABLED */
2093
#ifdef LIBXML_ICU_ENABLED
2094
    else if (handler->uconv_out != NULL) {
2095
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2096
                              1);
2097
    }
2098
#endif /* LIBXML_ICU_ENABLED */
2099
0
    else {
2100
0
        *outlen = 0;
2101
0
        *inlen = 0;
2102
0
        ret = -4;
2103
0
    }
2104
2105
237k
    return(ret);
2106
237k
}
2107
2108
/**
2109
 * xmlCharEncFirstLine:
2110
 * @handler:  char encoding transformation data structure
2111
 * @out:  an xmlBuffer for the output.
2112
 * @in:  an xmlBuffer for the input
2113
 *
2114
 * Front-end for the encoding handler input function, but handle only
2115
 * the very first line, i.e. limit itself to 45 chars.
2116
 *
2117
 * Returns the number of byte written if success, or
2118
 *     -1 general error
2119
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2120
 *        the result of transformation can't fit into the encoding we want), or
2121
 */
2122
int
2123
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2124
0
                    xmlBufferPtr in) {
2125
0
    int ret;
2126
0
    int written;
2127
0
    int toconv;
2128
2129
0
    if (handler == NULL) return(-1);
2130
0
    if (out == NULL) return(-1);
2131
0
    if (in == NULL) return(-1);
2132
2133
    /* calculate space available */
2134
0
    written = out->size - out->use - 1; /* count '\0' */
2135
0
    toconv = in->use;
2136
    /*
2137
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2138
     * 45 chars should be sufficient to reach the end of the encoding
2139
     * declaration without going too far inside the document content.
2140
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2141
     * The actual value depending on guessed encoding is passed as @len
2142
     * if provided
2143
     */
2144
0
    if (toconv > 180)
2145
0
        toconv = 180;
2146
0
    if (toconv * 2 >= written) {
2147
0
        xmlBufferGrow(out, toconv * 2);
2148
0
  written = out->size - out->use - 1;
2149
0
    }
2150
2151
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2152
0
                           in->content, &toconv, 0);
2153
0
    xmlBufferShrink(in, toconv);
2154
0
    out->use += written;
2155
0
    out->content[out->use] = 0;
2156
0
    if (ret == -1) ret = -3;
2157
2158
#ifdef DEBUG_ENCODING
2159
    switch (ret) {
2160
        case 0:
2161
      xmlGenericError(xmlGenericErrorContext,
2162
        "converted %d bytes to %d bytes of input\n",
2163
              toconv, written);
2164
      break;
2165
        case -1:
2166
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2167
              toconv, written, in->use);
2168
      break;
2169
        case -2:
2170
      xmlGenericError(xmlGenericErrorContext,
2171
        "input conversion failed due to input error\n");
2172
      break;
2173
        case -3:
2174
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2175
              toconv, written, in->use);
2176
      break;
2177
  default:
2178
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2179
    }
2180
#endif /* DEBUG_ENCODING */
2181
    /*
2182
     * Ignore when input buffer is not on a boundary
2183
     */
2184
0
    if (ret == -3) ret = 0;
2185
0
    if (ret == -1) ret = 0;
2186
0
    return(written ? written : ret);
2187
0
}
2188
2189
/**
2190
 * xmlCharEncFirstLineInput:
2191
 * @input: a parser input buffer
2192
 * @len:  number of bytes to convert for the first line, or -1
2193
 *
2194
 * Front-end for the encoding handler input function, but handle only
2195
 * the very first line. Point is that this is based on autodetection
2196
 * of the encoding and once that first line is converted we may find
2197
 * out that a different decoder is needed to process the input.
2198
 *
2199
 * Returns the number of byte written if success, or
2200
 *     -1 general error
2201
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2202
 *        the result of transformation can't fit into the encoding we want), or
2203
 */
2204
int
2205
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2206
49.6k
{
2207
49.6k
    int ret;
2208
49.6k
    size_t written;
2209
49.6k
    size_t toconv;
2210
49.6k
    int c_in;
2211
49.6k
    int c_out;
2212
49.6k
    xmlBufPtr in;
2213
49.6k
    xmlBufPtr out;
2214
2215
49.6k
    if ((input == NULL) || (input->encoder == NULL) ||
2216
49.6k
        (input->buffer == NULL) || (input->raw == NULL))
2217
0
        return (-1);
2218
49.6k
    out = input->buffer;
2219
49.6k
    in = input->raw;
2220
2221
49.6k
    toconv = xmlBufUse(in);
2222
49.6k
    if (toconv == 0)
2223
237
        return (0);
2224
49.4k
    written = xmlBufAvail(out);
2225
    /*
2226
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2227
     * 45 chars should be sufficient to reach the end of the encoding
2228
     * declaration without going too far inside the document content.
2229
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2230
     * The actual value depending on guessed encoding is passed as @len
2231
     * if provided
2232
     */
2233
49.4k
    if (len >= 0) {
2234
9.47k
        if (toconv > (unsigned int) len)
2235
7.08k
            toconv = len;
2236
39.9k
    } else {
2237
39.9k
        if (toconv > 180)
2238
26.2k
            toconv = 180;
2239
39.9k
    }
2240
49.4k
    if (toconv * 2 >= written) {
2241
0
        xmlBufGrow(out, toconv * 2);
2242
0
        written = xmlBufAvail(out);
2243
0
    }
2244
49.4k
    if (written > 360)
2245
49.4k
        written = 360;
2246
2247
49.4k
    c_in = toconv;
2248
49.4k
    c_out = written;
2249
49.4k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2250
49.4k
                           xmlBufContent(in), &c_in, 0);
2251
49.4k
    xmlBufShrink(in, c_in);
2252
49.4k
    xmlBufAddLen(out, c_out);
2253
49.4k
    if (ret == -1)
2254
2.98k
        ret = -3;
2255
2256
49.4k
    switch (ret) {
2257
43.1k
        case 0:
2258
#ifdef DEBUG_ENCODING
2259
            xmlGenericError(xmlGenericErrorContext,
2260
                            "converted %d bytes to %d bytes of input\n",
2261
                            c_in, c_out);
2262
#endif
2263
43.1k
            break;
2264
0
        case -1:
2265
#ifdef DEBUG_ENCODING
2266
            xmlGenericError(xmlGenericErrorContext,
2267
                         "converted %d bytes to %d bytes of input, %d left\n",
2268
                            c_in, c_out, (int)xmlBufUse(in));
2269
#endif
2270
0
            break;
2271
3.60k
        case -3:
2272
#ifdef DEBUG_ENCODING
2273
            xmlGenericError(xmlGenericErrorContext,
2274
                        "converted %d bytes to %d bytes of input, %d left\n",
2275
                            c_in, c_out, (int)xmlBufUse(in));
2276
#endif
2277
3.60k
            break;
2278
2.66k
        case -2: {
2279
2.66k
            char buf[50];
2280
2.66k
            const xmlChar *content = xmlBufContent(in);
2281
2282
2.66k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2283
2.66k
         content[0], content[1],
2284
2.66k
         content[2], content[3]);
2285
2.66k
      buf[49] = 0;
2286
2.66k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2287
2.66k
        "input conversion failed due to input error, bytes %s\n",
2288
2.66k
               buf);
2289
2.66k
        }
2290
49.4k
    }
2291
    /*
2292
     * Ignore when input buffer is not on a boundary
2293
     */
2294
49.4k
    if (ret == -3) ret = 0;
2295
49.4k
    if (ret == -1) ret = 0;
2296
49.4k
    return(c_out ? c_out : ret);
2297
49.4k
}
2298
2299
/**
2300
 * xmlCharEncInput:
2301
 * @input: a parser input buffer
2302
 * @flush: try to flush all the raw buffer
2303
 *
2304
 * Generic front-end for the encoding handler on parser input
2305
 *
2306
 * Returns the number of byte written if success, or
2307
 *     -1 general error
2308
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2309
 *        the result of transformation can't fit into the encoding we want), or
2310
 */
2311
int
2312
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2313
1.78M
{
2314
1.78M
    int ret;
2315
1.78M
    size_t written;
2316
1.78M
    size_t toconv;
2317
1.78M
    int c_in;
2318
1.78M
    int c_out;
2319
1.78M
    xmlBufPtr in;
2320
1.78M
    xmlBufPtr out;
2321
2322
1.78M
    if ((input == NULL) || (input->encoder == NULL) ||
2323
1.78M
        (input->buffer == NULL) || (input->raw == NULL))
2324
0
        return (-1);
2325
1.78M
    out = input->buffer;
2326
1.78M
    in = input->raw;
2327
2328
1.78M
    toconv = xmlBufUse(in);
2329
1.78M
    if (toconv == 0)
2330
393k
        return (0);
2331
1.39M
    if ((toconv > 64 * 1024) && (flush == 0))
2332
12
        toconv = 64 * 1024;
2333
1.39M
    written = xmlBufAvail(out);
2334
1.39M
    if (toconv * 2 >= written) {
2335
77.3k
        if (xmlBufGrow(out, toconv * 2) < 0)
2336
0
            return (-1);
2337
77.3k
        written = xmlBufAvail(out);
2338
77.3k
    }
2339
1.39M
    if ((written > 128 * 1024) && (flush == 0))
2340
18
        written = 128 * 1024;
2341
2342
1.39M
    c_in = toconv;
2343
1.39M
    c_out = written;
2344
1.39M
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2345
1.39M
                           xmlBufContent(in), &c_in, flush);
2346
1.39M
    xmlBufShrink(in, c_in);
2347
1.39M
    xmlBufAddLen(out, c_out);
2348
1.39M
    if (ret == -1)
2349
299k
        ret = -3;
2350
2351
1.39M
    switch (ret) {
2352
1.08M
        case 0:
2353
#ifdef DEBUG_ENCODING
2354
            xmlGenericError(xmlGenericErrorContext,
2355
                            "converted %d bytes to %d bytes of input\n",
2356
                            c_in, c_out);
2357
#endif
2358
1.08M
            break;
2359
0
        case -1:
2360
#ifdef DEBUG_ENCODING
2361
            xmlGenericError(xmlGenericErrorContext,
2362
                         "converted %d bytes to %d bytes of input, %d left\n",
2363
                            c_in, c_out, (int)xmlBufUse(in));
2364
#endif
2365
0
            break;
2366
306k
        case -3:
2367
#ifdef DEBUG_ENCODING
2368
            xmlGenericError(xmlGenericErrorContext,
2369
                        "converted %d bytes to %d bytes of input, %d left\n",
2370
                            c_in, c_out, (int)xmlBufUse(in));
2371
#endif
2372
306k
            break;
2373
6.50k
        case -2: {
2374
6.50k
            char buf[50];
2375
6.50k
            const xmlChar *content = xmlBufContent(in);
2376
2377
6.50k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2378
6.50k
         content[0], content[1],
2379
6.50k
         content[2], content[3]);
2380
6.50k
      buf[49] = 0;
2381
6.50k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2382
6.50k
        "input conversion failed due to input error, bytes %s\n",
2383
6.50k
               buf);
2384
6.50k
        }
2385
1.39M
    }
2386
    /*
2387
     * Ignore when input buffer is not on a boundary
2388
     */
2389
1.39M
    if (ret == -3)
2390
306k
        ret = 0;
2391
1.39M
    return (c_out? c_out : ret);
2392
1.39M
}
2393
2394
/**
2395
 * xmlCharEncInFunc:
2396
 * @handler:  char encoding transformation data structure
2397
 * @out:  an xmlBuffer for the output.
2398
 * @in:  an xmlBuffer for the input
2399
 *
2400
 * Generic front-end for the encoding handler input function
2401
 *
2402
 * Returns the number of byte written if success, or
2403
 *     -1 general error
2404
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2405
 *        the result of transformation can't fit into the encoding we want), or
2406
 */
2407
int
2408
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2409
                 xmlBufferPtr in)
2410
0
{
2411
0
    int ret;
2412
0
    int written;
2413
0
    int toconv;
2414
2415
0
    if (handler == NULL)
2416
0
        return (-1);
2417
0
    if (out == NULL)
2418
0
        return (-1);
2419
0
    if (in == NULL)
2420
0
        return (-1);
2421
2422
0
    toconv = in->use;
2423
0
    if (toconv == 0)
2424
0
        return (0);
2425
0
    written = out->size - out->use -1; /* count '\0' */
2426
0
    if (toconv * 2 >= written) {
2427
0
        xmlBufferGrow(out, out->size + toconv * 2);
2428
0
        written = out->size - out->use - 1;
2429
0
    }
2430
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2431
0
                           in->content, &toconv, 1);
2432
0
    xmlBufferShrink(in, toconv);
2433
0
    out->use += written;
2434
0
    out->content[out->use] = 0;
2435
0
    if (ret == -1)
2436
0
        ret = -3;
2437
2438
0
    switch (ret) {
2439
0
        case 0:
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                            "converted %d bytes to %d bytes of input\n",
2443
                            toconv, written);
2444
#endif
2445
0
            break;
2446
0
        case -1:
2447
#ifdef DEBUG_ENCODING
2448
            xmlGenericError(xmlGenericErrorContext,
2449
                         "converted %d bytes to %d bytes of input, %d left\n",
2450
                            toconv, written, in->use);
2451
#endif
2452
0
            break;
2453
0
        case -3:
2454
#ifdef DEBUG_ENCODING
2455
            xmlGenericError(xmlGenericErrorContext,
2456
                        "converted %d bytes to %d bytes of input, %d left\n",
2457
                            toconv, written, in->use);
2458
#endif
2459
0
            break;
2460
0
        case -2: {
2461
0
            char buf[50];
2462
2463
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2464
0
         in->content[0], in->content[1],
2465
0
         in->content[2], in->content[3]);
2466
0
      buf[49] = 0;
2467
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2468
0
        "input conversion failed due to input error, bytes %s\n",
2469
0
               buf);
2470
0
        }
2471
0
    }
2472
    /*
2473
     * Ignore when input buffer is not on a boundary
2474
     */
2475
0
    if (ret == -3)
2476
0
        ret = 0;
2477
0
    return (written? written : ret);
2478
0
}
2479
2480
#ifdef LIBXML_OUTPUT_ENABLED
2481
/**
2482
 * xmlCharEncOutput:
2483
 * @output: a parser output buffer
2484
 * @init: is this an initialization call without data
2485
 *
2486
 * Generic front-end for the encoding handler on parser output
2487
 * a first call with @init == 1 has to be made first to initiate the
2488
 * output in case of non-stateless encoding needing to initiate their
2489
 * state or the output (like the BOM in UTF16).
2490
 * In case of UTF8 sequence conversion errors for the given encoder,
2491
 * the content will be automatically remapped to a CharRef sequence.
2492
 *
2493
 * Returns the number of byte written if success, or
2494
 *     -1 general error
2495
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2496
 *        the result of transformation can't fit into the encoding we want), or
2497
 */
2498
int
2499
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2500
129k
{
2501
129k
    int ret;
2502
129k
    size_t written;
2503
129k
    int writtentot = 0;
2504
129k
    size_t toconv;
2505
129k
    int c_in;
2506
129k
    int c_out;
2507
129k
    xmlBufPtr in;
2508
129k
    xmlBufPtr out;
2509
2510
129k
    if ((output == NULL) || (output->encoder == NULL) ||
2511
129k
        (output->buffer == NULL) || (output->conv == NULL))
2512
0
        return (-1);
2513
129k
    out = output->conv;
2514
129k
    in = output->buffer;
2515
2516
187k
retry:
2517
2518
187k
    written = xmlBufAvail(out);
2519
2520
    /*
2521
     * First specific handling of the initialization call
2522
     */
2523
187k
    if (init) {
2524
9.86k
        c_in = 0;
2525
9.86k
        c_out = written;
2526
        /* TODO: Check return value. */
2527
9.86k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2528
9.86k
                          NULL, &c_in);
2529
9.86k
        xmlBufAddLen(out, c_out);
2530
#ifdef DEBUG_ENCODING
2531
  xmlGenericError(xmlGenericErrorContext,
2532
    "initialized encoder\n");
2533
#endif
2534
9.86k
        return(c_out);
2535
9.86k
    }
2536
2537
    /*
2538
     * Conversion itself.
2539
     */
2540
178k
    toconv = xmlBufUse(in);
2541
178k
    if (toconv == 0)
2542
9.11k
        return (writtentot);
2543
168k
    if (toconv > 64 * 1024)
2544
3
        toconv = 64 * 1024;
2545
168k
    if (toconv * 4 >= written) {
2546
8.45k
        xmlBufGrow(out, toconv * 4);
2547
8.45k
        written = xmlBufAvail(out);
2548
8.45k
    }
2549
168k
    if (written > 256 * 1024)
2550
360
        written = 256 * 1024;
2551
2552
168k
    c_in = toconv;
2553
168k
    c_out = written;
2554
168k
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2555
168k
                            xmlBufContent(in), &c_in);
2556
168k
    xmlBufShrink(in, c_in);
2557
168k
    xmlBufAddLen(out, c_out);
2558
168k
    writtentot += c_out;
2559
168k
    if (ret == -1) {
2560
0
        if (c_out > 0) {
2561
            /* Can be a limitation of iconv or uconv */
2562
0
            goto retry;
2563
0
        }
2564
0
        ret = -3;
2565
0
    }
2566
2567
    /*
2568
     * Attempt to handle error cases
2569
     */
2570
168k
    switch (ret) {
2571
109k
        case 0:
2572
#ifdef DEBUG_ENCODING
2573
      xmlGenericError(xmlGenericErrorContext,
2574
        "converted %d bytes to %d bytes of output\n",
2575
              c_in, c_out);
2576
#endif
2577
109k
      break;
2578
0
        case -1:
2579
#ifdef DEBUG_ENCODING
2580
      xmlGenericError(xmlGenericErrorContext,
2581
        "output conversion failed by lack of space\n");
2582
#endif
2583
0
      break;
2584
327
        case -3:
2585
#ifdef DEBUG_ENCODING
2586
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2587
              c_in, c_out, (int) xmlBufUse(in));
2588
#endif
2589
327
      break;
2590
0
        case -4:
2591
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2592
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2593
0
            ret = -1;
2594
0
            break;
2595
59.5k
        case -2: {
2596
59.5k
      xmlChar charref[20];
2597
59.5k
      int len = xmlBufUse(in);
2598
59.5k
            xmlChar *content = xmlBufContent(in);
2599
59.5k
      int cur, charrefLen;
2600
2601
59.5k
      cur = xmlGetUTF8Char(content, &len);
2602
59.5k
      if (cur <= 0)
2603
1.33k
                break;
2604
2605
#ifdef DEBUG_ENCODING
2606
            xmlGenericError(xmlGenericErrorContext,
2607
                    "handling output conversion error\n");
2608
            xmlGenericError(xmlGenericErrorContext,
2609
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2610
                    content[0], content[1],
2611
                    content[2], content[3]);
2612
#endif
2613
            /*
2614
             * Removes the UTF8 sequence, and replace it by a charref
2615
             * and continue the transcoding phase, hoping the error
2616
             * did not mangle the encoder state.
2617
             */
2618
58.2k
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2619
58.2k
                             "&#%d;", cur);
2620
58.2k
            xmlBufShrink(in, len);
2621
58.2k
            xmlBufGrow(out, charrefLen * 4);
2622
58.2k
            c_out = xmlBufAvail(out);
2623
58.2k
            c_in = charrefLen;
2624
58.2k
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2625
58.2k
                                    charref, &c_in);
2626
2627
58.2k
      if ((ret < 0) || (c_in != charrefLen)) {
2628
0
    char buf[50];
2629
2630
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2631
0
       content[0], content[1],
2632
0
       content[2], content[3]);
2633
0
    buf[49] = 0;
2634
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2635
0
        "output conversion failed due to conv error, bytes %s\n",
2636
0
             buf);
2637
0
    content[0] = ' ';
2638
0
                break;
2639
0
      }
2640
2641
58.2k
            xmlBufAddLen(out, c_out);
2642
58.2k
            writtentot += c_out;
2643
58.2k
            goto retry;
2644
58.2k
  }
2645
168k
    }
2646
110k
    return(writtentot ? writtentot : ret);
2647
168k
}
2648
#endif
2649
2650
/**
2651
 * xmlCharEncOutFunc:
2652
 * @handler:  char encoding transformation data structure
2653
 * @out:  an xmlBuffer for the output.
2654
 * @in:  an xmlBuffer for the input
2655
 *
2656
 * Generic front-end for the encoding handler output function
2657
 * a first call with @in == NULL has to be made firs to initiate the
2658
 * output in case of non-stateless encoding needing to initiate their
2659
 * state or the output (like the BOM in UTF16).
2660
 * In case of UTF8 sequence conversion errors for the given encoder,
2661
 * the content will be automatically remapped to a CharRef sequence.
2662
 *
2663
 * Returns the number of byte written if success, or
2664
 *     -1 general error
2665
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2666
 *        the result of transformation can't fit into the encoding we want), or
2667
 */
2668
int
2669
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2670
0
                  xmlBufferPtr in) {
2671
0
    int ret;
2672
0
    int written;
2673
0
    int writtentot = 0;
2674
0
    int toconv;
2675
2676
0
    if (handler == NULL) return(-1);
2677
0
    if (out == NULL) return(-1);
2678
2679
0
retry:
2680
2681
0
    written = out->size - out->use;
2682
2683
0
    if (written > 0)
2684
0
  written--; /* Gennady: count '/0' */
2685
2686
    /*
2687
     * First specific handling of in = NULL, i.e. the initialization call
2688
     */
2689
0
    if (in == NULL) {
2690
0
        toconv = 0;
2691
        /* TODO: Check return value. */
2692
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2693
0
                          NULL, &toconv);
2694
0
        out->use += written;
2695
0
        out->content[out->use] = 0;
2696
#ifdef DEBUG_ENCODING
2697
  xmlGenericError(xmlGenericErrorContext,
2698
    "initialized encoder\n");
2699
#endif
2700
0
        return(0);
2701
0
    }
2702
2703
    /*
2704
     * Conversion itself.
2705
     */
2706
0
    toconv = in->use;
2707
0
    if (toconv == 0)
2708
0
  return(0);
2709
0
    if (toconv * 4 >= written) {
2710
0
        xmlBufferGrow(out, toconv * 4);
2711
0
  written = out->size - out->use - 1;
2712
0
    }
2713
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714
0
                            in->content, &toconv);
2715
0
    xmlBufferShrink(in, toconv);
2716
0
    out->use += written;
2717
0
    writtentot += written;
2718
0
    out->content[out->use] = 0;
2719
0
    if (ret == -1) {
2720
0
        if (written > 0) {
2721
            /* Can be a limitation of iconv or uconv */
2722
0
            goto retry;
2723
0
        }
2724
0
        ret = -3;
2725
0
    }
2726
2727
    /*
2728
     * Attempt to handle error cases
2729
     */
2730
0
    switch (ret) {
2731
0
        case 0:
2732
#ifdef DEBUG_ENCODING
2733
      xmlGenericError(xmlGenericErrorContext,
2734
        "converted %d bytes to %d bytes of output\n",
2735
              toconv, written);
2736
#endif
2737
0
      break;
2738
0
        case -1:
2739
#ifdef DEBUG_ENCODING
2740
      xmlGenericError(xmlGenericErrorContext,
2741
        "output conversion failed by lack of space\n");
2742
#endif
2743
0
      break;
2744
0
        case -3:
2745
#ifdef DEBUG_ENCODING
2746
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2747
              toconv, written, in->use);
2748
#endif
2749
0
      break;
2750
0
        case -4:
2751
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2752
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2753
0
      ret = -1;
2754
0
            break;
2755
0
        case -2: {
2756
0
      xmlChar charref[20];
2757
0
      int len = in->use;
2758
0
      const xmlChar *utf = (const xmlChar *) in->content;
2759
0
      int cur, charrefLen;
2760
2761
0
      cur = xmlGetUTF8Char(utf, &len);
2762
0
      if (cur <= 0)
2763
0
                break;
2764
2765
#ifdef DEBUG_ENCODING
2766
            xmlGenericError(xmlGenericErrorContext,
2767
                    "handling output conversion error\n");
2768
            xmlGenericError(xmlGenericErrorContext,
2769
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2770
                    in->content[0], in->content[1],
2771
                    in->content[2], in->content[3]);
2772
#endif
2773
            /*
2774
             * Removes the UTF8 sequence, and replace it by a charref
2775
             * and continue the transcoding phase, hoping the error
2776
             * did not mangle the encoder state.
2777
             */
2778
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2779
0
                             "&#%d;", cur);
2780
0
            xmlBufferShrink(in, len);
2781
0
            xmlBufferGrow(out, charrefLen * 4);
2782
0
      written = out->size - out->use - 1;
2783
0
            toconv = charrefLen;
2784
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2785
0
                                    charref, &toconv);
2786
2787
0
      if ((ret < 0) || (toconv != charrefLen)) {
2788
0
    char buf[50];
2789
2790
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2791
0
       in->content[0], in->content[1],
2792
0
       in->content[2], in->content[3]);
2793
0
    buf[49] = 0;
2794
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2795
0
        "output conversion failed due to conv error, bytes %s\n",
2796
0
             buf);
2797
0
    in->content[0] = ' ';
2798
0
          break;
2799
0
      }
2800
2801
0
            out->use += written;
2802
0
            writtentot += written;
2803
0
            out->content[out->use] = 0;
2804
0
            goto retry;
2805
0
  }
2806
0
    }
2807
0
    return(writtentot ? writtentot : ret);
2808
0
}
2809
2810
/**
2811
 * xmlCharEncCloseFunc:
2812
 * @handler:  char encoding transformation data structure
2813
 *
2814
 * Generic front-end for encoding handler close function
2815
 *
2816
 * Returns 0 if success, or -1 in case of error
2817
 */
2818
int
2819
63.5k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2820
63.5k
    int ret = 0;
2821
63.5k
    int tofree = 0;
2822
63.5k
    int i = 0;
2823
2824
63.5k
    if (handler == NULL) return(-1);
2825
63.5k
    if (handler->name == NULL) return(-1);
2826
2827
340k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2828
328k
        if (handler == &defaultHandlers[i])
2829
51.3k
            return(0);
2830
328k
    }
2831
2832
12.1k
    if (handlers != NULL) {
2833
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2834
0
            if (handler == handlers[i])
2835
0
                return(0);
2836
0
  }
2837
0
    }
2838
12.1k
#ifdef LIBXML_ICONV_ENABLED
2839
    /*
2840
     * Iconv handlers can be used only once, free the whole block.
2841
     * and the associated icon resources.
2842
     */
2843
12.1k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2844
12.1k
        tofree = 1;
2845
12.1k
  if (handler->iconv_out != NULL) {
2846
12.1k
      if (iconv_close(handler->iconv_out))
2847
0
    ret = -1;
2848
12.1k
      handler->iconv_out = NULL;
2849
12.1k
  }
2850
12.1k
  if (handler->iconv_in != NULL) {
2851
12.1k
      if (iconv_close(handler->iconv_in))
2852
0
    ret = -1;
2853
12.1k
      handler->iconv_in = NULL;
2854
12.1k
  }
2855
12.1k
    }
2856
12.1k
#endif /* LIBXML_ICONV_ENABLED */
2857
#ifdef LIBXML_ICU_ENABLED
2858
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2859
        tofree = 1;
2860
  if (handler->uconv_out != NULL) {
2861
      closeIcuConverter(handler->uconv_out);
2862
      handler->uconv_out = NULL;
2863
  }
2864
  if (handler->uconv_in != NULL) {
2865
      closeIcuConverter(handler->uconv_in);
2866
      handler->uconv_in = NULL;
2867
  }
2868
    }
2869
#endif
2870
12.1k
    if (tofree) {
2871
        /* free up only dynamic handlers iconv/uconv */
2872
12.1k
        if (handler->name != NULL)
2873
12.1k
            xmlFree(handler->name);
2874
12.1k
        handler->name = NULL;
2875
12.1k
        xmlFree(handler);
2876
12.1k
    }
2877
#ifdef DEBUG_ENCODING
2878
    if (ret)
2879
        xmlGenericError(xmlGenericErrorContext,
2880
    "failed to close the encoding handler\n");
2881
    else
2882
        xmlGenericError(xmlGenericErrorContext,
2883
    "closed the encoding handler\n");
2884
#endif
2885
2886
12.1k
    return(ret);
2887
12.1k
}
2888
2889
/**
2890
 * xmlByteConsumed:
2891
 * @ctxt: an XML parser context
2892
 *
2893
 * This function provides the current index of the parser relative
2894
 * to the start of the current entity. This function is computed in
2895
 * bytes from the beginning starting at zero and finishing at the
2896
 * size in byte of the file if parsing a file. The function is
2897
 * of constant cost if the input is UTF-8 but can be costly if run
2898
 * on non-UTF-8 input.
2899
 *
2900
 * Returns the index in bytes from the beginning of the entity or -1
2901
 *         in case the index could not be computed.
2902
 */
2903
long
2904
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2905
0
    xmlParserInputPtr in;
2906
2907
0
    if (ctxt == NULL) return(-1);
2908
0
    in = ctxt->input;
2909
0
    if (in == NULL)  return(-1);
2910
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2911
0
        unsigned int unused = 0;
2912
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2913
        /*
2914
   * Encoding conversion, compute the number of unused original
2915
   * bytes from the input not consumed and subtract that from
2916
   * the raw consumed value, this is not a cheap operation
2917
   */
2918
0
        if (in->end - in->cur > 0) {
2919
0
      unsigned char convbuf[32000];
2920
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2921
0
      int toconv = in->end - in->cur, written = 32000;
2922
2923
0
      int ret;
2924
2925
0
            do {
2926
0
                toconv = in->end - cur;
2927
0
                written = 32000;
2928
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2929
0
                                        cur, &toconv);
2930
0
                if (ret < 0) {
2931
0
                    if (written > 0)
2932
0
                        ret = -2;
2933
0
                    else
2934
0
                        return(-1);
2935
0
                }
2936
0
                unused += written;
2937
0
                cur += toconv;
2938
0
            } while (ret == -2);
2939
0
  }
2940
0
  if (in->buf->rawconsumed < unused)
2941
0
      return(-1);
2942
0
  return(in->buf->rawconsumed - unused);
2943
0
    }
2944
0
    return(in->consumed + (in->cur - in->base));
2945
0
}
2946
2947
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2948
#ifdef LIBXML_ISO8859X_ENABLED
2949
2950
/**
2951
 * UTF8ToISO8859x:
2952
 * @out:  a pointer to an array of bytes to store the result
2953
 * @outlen:  the length of @out
2954
 * @in:  a pointer to an array of UTF-8 chars
2955
 * @inlen:  the length of @in
2956
 * @xlattable: the 2-level transcoding table
2957
 *
2958
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2959
 * block of chars out.
2960
 *
2961
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2962
 * The value of @inlen after return is the number of octets consumed
2963
 *     as the return value is positive, else unpredictable.
2964
 * The value of @outlen after return is the number of octets consumed.
2965
 */
2966
static int
2967
UTF8ToISO8859x(unsigned char* out, int *outlen,
2968
              const unsigned char* in, int *inlen,
2969
              const unsigned char* const xlattable) {
2970
    const unsigned char* outstart = out;
2971
    const unsigned char* inend;
2972
    const unsigned char* instart = in;
2973
    const unsigned char* processed = in;
2974
2975
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2976
        (xlattable == NULL))
2977
  return(-1);
2978
    if (in == NULL) {
2979
        /*
2980
        * initialization nothing to do
2981
        */
2982
        *outlen = 0;
2983
        *inlen = 0;
2984
        return(0);
2985
    }
2986
    inend = in + (*inlen);
2987
    while (in < inend) {
2988
        unsigned char d = *in++;
2989
        if  (d < 0x80)  {
2990
            *out++ = d;
2991
        } else if (d < 0xC0) {
2992
            /* trailing byte in leading position */
2993
            *outlen = out - outstart;
2994
            *inlen = processed - instart;
2995
            return(-2);
2996
        } else if (d < 0xE0) {
2997
            unsigned char c;
2998
            if (!(in < inend)) {
2999
                /* trailing byte not in input buffer */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-3);
3003
            }
3004
            c = *in++;
3005
            if ((c & 0xC0) != 0x80) {
3006
                /* not a trailing byte */
3007
                *outlen = out - outstart;
3008
                *inlen = processed - instart;
3009
                return(-2);
3010
            }
3011
            c = c & 0x3F;
3012
            d = d & 0x1F;
3013
            d = xlattable [48 + c + xlattable [d] * 64];
3014
            if (d == 0) {
3015
                /* not in character set */
3016
                *outlen = out - outstart;
3017
                *inlen = processed - instart;
3018
                return(-2);
3019
            }
3020
            *out++ = d;
3021
        } else if (d < 0xF0) {
3022
            unsigned char c1;
3023
            unsigned char c2;
3024
            if (!(in < inend - 1)) {
3025
                /* trailing bytes not in input buffer */
3026
                *outlen = out - outstart;
3027
                *inlen = processed - instart;
3028
                return(-3);
3029
            }
3030
            c1 = *in++;
3031
            if ((c1 & 0xC0) != 0x80) {
3032
                /* not a trailing byte (c1) */
3033
                *outlen = out - outstart;
3034
                *inlen = processed - instart;
3035
                return(-2);
3036
            }
3037
            c2 = *in++;
3038
            if ((c2 & 0xC0) != 0x80) {
3039
                /* not a trailing byte (c2) */
3040
                *outlen = out - outstart;
3041
                *inlen = processed - instart;
3042
                return(-2);
3043
            }
3044
            c1 = c1 & 0x3F;
3045
            c2 = c2 & 0x3F;
3046
      d = d & 0x0F;
3047
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3048
      xlattable [32 + d] * 64] * 64];
3049
            if (d == 0) {
3050
                /* not in character set */
3051
                *outlen = out - outstart;
3052
                *inlen = processed - instart;
3053
                return(-2);
3054
            }
3055
            *out++ = d;
3056
        } else {
3057
            /* cannot transcode >= U+010000 */
3058
            *outlen = out - outstart;
3059
            *inlen = processed - instart;
3060
            return(-2);
3061
        }
3062
        processed = in;
3063
    }
3064
    *outlen = out - outstart;
3065
    *inlen = processed - instart;
3066
    return(*outlen);
3067
}
3068
3069
/**
3070
 * ISO8859xToUTF8
3071
 * @out:  a pointer to an array of bytes to store the result
3072
 * @outlen:  the length of @out
3073
 * @in:  a pointer to an array of ISO Latin 1 chars
3074
 * @inlen:  the length of @in
3075
 *
3076
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3077
 * block of chars out.
3078
 * Returns 0 if success, or -1 otherwise
3079
 * The value of @inlen after return is the number of octets consumed
3080
 * The value of @outlen after return is the number of octets produced.
3081
 */
3082
static int
3083
ISO8859xToUTF8(unsigned char* out, int *outlen,
3084
              const unsigned char* in, int *inlen,
3085
              unsigned short const *unicodetable) {
3086
    unsigned char* outstart = out;
3087
    unsigned char* outend;
3088
    const unsigned char* instart = in;
3089
    const unsigned char* inend;
3090
    const unsigned char* instop;
3091
    unsigned int c;
3092
3093
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3094
        (in == NULL) || (unicodetable == NULL))
3095
  return(-1);
3096
    outend = out + *outlen;
3097
    inend = in + *inlen;
3098
    instop = inend;
3099
3100
    while ((in < inend) && (out < outend - 2)) {
3101
        if (*in >= 0x80) {
3102
            c = unicodetable [*in - 0x80];
3103
            if (c == 0) {
3104
                /* undefined code point */
3105
                *outlen = out - outstart;
3106
                *inlen = in - instart;
3107
                return (-1);
3108
            }
3109
            if (c < 0x800) {
3110
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3111
                *out++ = (c & 0x3F) | 0x80;
3112
            } else {
3113
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3114
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3115
                *out++ = (c & 0x3F) | 0x80;
3116
            }
3117
            ++in;
3118
        }
3119
        if (instop - in > outend - out) instop = in + (outend - out);
3120
        while ((*in < 0x80) && (in < instop)) {
3121
            *out++ = *in++;
3122
        }
3123
    }
3124
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3125
        *out++ =  *in++;
3126
    }
3127
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3128
        *out++ =  *in++;
3129
    }
3130
    *outlen = out - outstart;
3131
    *inlen = in - instart;
3132
    return (*outlen);
3133
}
3134
3135
3136
/************************************************************************
3137
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3138
 ************************************************************************/
3139
3140
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3146
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3147
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3148
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3149
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3150
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3151
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3152
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3153
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3154
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3155
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3156
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3160
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3170
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3172
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3180
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3181
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3182
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3183
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3184
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3185
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3186
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3187
};
3188
3189
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3190
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3195
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3196
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3197
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3198
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3199
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3201
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3202
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3203
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3205
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3206
};
3207
3208
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3209
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3222
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3234
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3236
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3237
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3238
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3239
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3240
};
3241
3242
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3243
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3244
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3245
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3246
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3247
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3248
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3249
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3250
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3251
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3252
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3253
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3254
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3255
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3256
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3257
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3258
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3259
};
3260
3261
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3262
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3270
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3271
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3272
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3273
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3274
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3276
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3280
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3286
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3287
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3288
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3289
};
3290
3291
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3292
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3297
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3298
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3299
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3300
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3301
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3302
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3303
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3304
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3305
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3306
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3307
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3308
};
3309
3310
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3311
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3323
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3324
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3326
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3347
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3349
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3350
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3351
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3352
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3353
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3354
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3355
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3356
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3360
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3377
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3378
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3379
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3380
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
};
3384
3385
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3386
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3387
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3388
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3389
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3390
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3391
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3392
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3393
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3394
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3395
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3396
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3397
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3398
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3399
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3400
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3401
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3402
};
3403
3404
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3405
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3413
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3414
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3415
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3416
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3431
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
};
3437
3438
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3439
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3440
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3441
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3442
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3443
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3444
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3445
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3446
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3447
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3448
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3449
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3450
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3451
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3452
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3453
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3454
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3455
};
3456
3457
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3458
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3466
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3467
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3468
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3469
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3482
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3487
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
};
3490
3491
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3492
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3493
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3494
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3495
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3496
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3497
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3498
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3499
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3500
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3501
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3502
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3503
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3504
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3505
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3506
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3507
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3508
};
3509
3510
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3511
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3519
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3520
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3524
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3525
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
};
3535
3536
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3537
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3538
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3539
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3540
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3541
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3542
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3543
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3544
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3545
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3546
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3547
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3548
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3549
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3550
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3551
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3552
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3553
};
3554
3555
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3556
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3564
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3565
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3566
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3568
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3570
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3574
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3575
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3584
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3585
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3586
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3587
};
3588
3589
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3590
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3591
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3592
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3593
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3594
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3595
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3596
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3597
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3598
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3599
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3600
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3601
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3602
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3603
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3604
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3605
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3606
};
3607
3608
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3609
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3617
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3618
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3624
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3625
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3626
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3627
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3628
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3633
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
};
3637
3638
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3639
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3644
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3645
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3646
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3647
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3648
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3649
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3650
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3651
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3652
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3653
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3654
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3655
};
3656
3657
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3658
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3668
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3669
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3679
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3681
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3683
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3684
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3685
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3686
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3688
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3689
};
3690
3691
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3692
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3693
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3694
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3695
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3696
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3697
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3698
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3699
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3700
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3701
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3702
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3703
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3704
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3705
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3706
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3707
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3708
};
3709
3710
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3711
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3719
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3720
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3721
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3726
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3746
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3751
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3752
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3753
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3754
};
3755
3756
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3757
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3758
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3759
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3760
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3761
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3762
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3763
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3764
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3765
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3766
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3767
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3768
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3769
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3770
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3771
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3772
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3773
};
3774
3775
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3776
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3784
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3785
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3786
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3787
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3794
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3799
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3801
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3803
};
3804
3805
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3806
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3811
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3812
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3813
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3814
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3815
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3817
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3818
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3819
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3821
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3822
};
3823
3824
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3825
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3835
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3836
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3837
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3842
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3844
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3860
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3861
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3862
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3863
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3864
};
3865
3866
3867
/*
3868
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3869
 */
3870
3871
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3874
}
3875
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3878
}
3879
3880
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3883
}
3884
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3887
}
3888
3889
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3892
}
3893
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3894
    const unsigned char* in, int *inlen) {
3895
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3896
}
3897
3898
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3901
}
3902
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3903
    const unsigned char* in, int *inlen) {
3904
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3905
}
3906
3907
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3910
}
3911
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3912
    const unsigned char* in, int *inlen) {
3913
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3914
}
3915
3916
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3919
}
3920
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3921
    const unsigned char* in, int *inlen) {
3922
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3923
}
3924
3925
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3928
}
3929
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3930
    const unsigned char* in, int *inlen) {
3931
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3932
}
3933
3934
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3937
}
3938
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3939
    const unsigned char* in, int *inlen) {
3940
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3941
}
3942
3943
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3946
}
3947
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3948
    const unsigned char* in, int *inlen) {
3949
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3950
}
3951
3952
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3955
}
3956
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3957
    const unsigned char* in, int *inlen) {
3958
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3959
}
3960
3961
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3964
}
3965
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3966
    const unsigned char* in, int *inlen) {
3967
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3968
}
3969
3970
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3973
}
3974
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3975
    const unsigned char* in, int *inlen) {
3976
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3977
}
3978
3979
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3980
    const unsigned char* in, int *inlen) {
3981
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3982
}
3983
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3984
    const unsigned char* in, int *inlen) {
3985
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3986
}
3987
3988
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3989
    const unsigned char* in, int *inlen) {
3990
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3991
}
3992
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3993
    const unsigned char* in, int *inlen) {
3994
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3995
}
3996
3997
#endif
3998
#endif
3999