Coverage Report

Created: 2022-05-03 06:10

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "buf.h"
44
#include "enc.h"
45
46
#ifdef LIBXML_ICU_ENABLED
47
#include <unicode/ucnv.h>
48
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49
#define ICU_PIVOT_BUF_SIZE 1024
50
typedef struct _uconv_t uconv_t;
51
struct _uconv_t {
52
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
55
  UChar      *pivot_source;
56
  UChar      *pivot_target;
57
};
58
#endif
59
60
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62
63
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65
struct _xmlCharEncodingAlias {
66
    const char *name;
67
    const char *alias;
68
};
69
70
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71
static int xmlCharEncodingAliasesNb = 0;
72
static int xmlCharEncodingAliasesMax = 0;
73
74
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75
#if 0
76
#define DEBUG_ENCODING  /* Define this to get encoding traces */
77
#endif
78
#else
79
#ifdef LIBXML_ISO8859X_ENABLED
80
static void xmlRegisterCharEncodingHandlersISO8859x (void);
81
#endif
82
#endif
83
84
static int xmlLittleEndian = 1;
85
86
/**
87
 * xmlEncodingErrMemory:
88
 * @extra:  extra information
89
 *
90
 * Handle an out of memory condition
91
 */
92
static void
93
xmlEncodingErrMemory(const char *extra)
94
0
{
95
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96
0
}
97
98
/**
99
 * xmlErrEncoding:
100
 * @error:  the error number
101
 * @msg:  the error message
102
 *
103
 * n encoding error
104
 */
105
static void LIBXML_ATTR_FORMAT(2,0)
106
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107
731
{
108
731
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109
731
                    XML_FROM_I18N, error, XML_ERR_FATAL,
110
731
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111
731
}
112
113
#ifdef LIBXML_ICU_ENABLED
114
static uconv_t*
115
openIcuConverter(const char* name, int toUnicode)
116
{
117
  UErrorCode status = U_ZERO_ERROR;
118
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119
  if (conv == NULL)
120
    return NULL;
121
122
  conv->pivot_source = conv->pivot_buf;
123
  conv->pivot_target = conv->pivot_buf;
124
125
  conv->uconv = ucnv_open(name, &status);
126
  if (U_FAILURE(status))
127
    goto error;
128
129
  status = U_ZERO_ERROR;
130
  if (toUnicode) {
131
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132
                        NULL, NULL, NULL, &status);
133
  }
134
  else {
135
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136
                        NULL, NULL, NULL, &status);
137
  }
138
  if (U_FAILURE(status))
139
    goto error;
140
141
  status = U_ZERO_ERROR;
142
  conv->utf8 = ucnv_open("UTF-8", &status);
143
  if (U_SUCCESS(status))
144
    return conv;
145
146
error:
147
  if (conv->uconv)
148
    ucnv_close(conv->uconv);
149
  xmlFree(conv);
150
  return NULL;
151
}
152
153
static void
154
closeIcuConverter(uconv_t *conv)
155
{
156
  if (conv != NULL) {
157
    ucnv_close(conv->uconv);
158
    ucnv_close(conv->utf8);
159
    xmlFree(conv);
160
  }
161
}
162
#endif /* LIBXML_ICU_ENABLED */
163
164
/************************************************************************
165
 *                  *
166
 *    Conversions To/From UTF8 encoding     *
167
 *                  *
168
 ************************************************************************/
169
170
/**
171
 * asciiToUTF8:
172
 * @out:  a pointer to an array of bytes to store the result
173
 * @outlen:  the length of @out
174
 * @in:  a pointer to an array of ASCII chars
175
 * @inlen:  the length of @in
176
 *
177
 * Take a block of ASCII chars in and try to convert it to an UTF-8
178
 * block of chars out.
179
 * Returns 0 if success, or -1 otherwise
180
 * The value of @inlen after return is the number of octets consumed
181
 *     if the return value is positive, else unpredictable.
182
 * The value of @outlen after return is the number of octets produced.
183
 */
184
static int
185
asciiToUTF8(unsigned char* out, int *outlen,
186
1.93k
              const unsigned char* in, int *inlen) {
187
1.93k
    unsigned char* outstart = out;
188
1.93k
    const unsigned char* base = in;
189
1.93k
    const unsigned char* processed = in;
190
1.93k
    unsigned char* outend = out + *outlen;
191
1.93k
    const unsigned char* inend;
192
1.93k
    unsigned int c;
193
194
1.93k
    inend = in + (*inlen);
195
4.07k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
196
4.06k
  c= *in++;
197
198
4.06k
        if (out >= outend)
199
0
      break;
200
4.06k
        if (c < 0x80) {
201
2.13k
      *out++ = c;
202
2.13k
  } else {
203
1.92k
      *outlen = out - outstart;
204
1.92k
      *inlen = processed - base;
205
1.92k
      return(-1);
206
1.92k
  }
207
208
2.13k
  processed = (const unsigned char*) in;
209
2.13k
    }
210
15
    *outlen = out - outstart;
211
15
    *inlen = processed - base;
212
15
    return(*outlen);
213
1.93k
}
214
215
#ifdef LIBXML_OUTPUT_ENABLED
216
/**
217
 * UTF8Toascii:
218
 * @out:  a pointer to an array of bytes to store the result
219
 * @outlen:  the length of @out
220
 * @in:  a pointer to an array of UTF-8 chars
221
 * @inlen:  the length of @in
222
 *
223
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
224
 * block of chars out.
225
 *
226
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227
 * The value of @inlen after return is the number of octets consumed
228
 *     if the return value is positive, else unpredictable.
229
 * The value of @outlen after return is the number of octets produced.
230
 */
231
static int
232
UTF8Toascii(unsigned char* out, int *outlen,
233
0
              const unsigned char* in, int *inlen) {
234
0
    const unsigned char* processed = in;
235
0
    const unsigned char* outend;
236
0
    const unsigned char* outstart = out;
237
0
    const unsigned char* instart = in;
238
0
    const unsigned char* inend;
239
0
    unsigned int c, d;
240
0
    int trailing;
241
242
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243
0
    if (in == NULL) {
244
        /*
245
   * initialization nothing to do
246
   */
247
0
  *outlen = 0;
248
0
  *inlen = 0;
249
0
  return(0);
250
0
    }
251
0
    inend = in + (*inlen);
252
0
    outend = out + (*outlen);
253
0
    while (in < inend) {
254
0
  d = *in++;
255
0
  if      (d < 0x80)  { c= d; trailing= 0; }
256
0
  else if (d < 0xC0) {
257
      /* trailing byte in leading position */
258
0
      *outlen = out - outstart;
259
0
      *inlen = processed - instart;
260
0
      return(-2);
261
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
262
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
263
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
264
0
  else {
265
      /* no chance for this in Ascii */
266
0
      *outlen = out - outstart;
267
0
      *inlen = processed - instart;
268
0
      return(-2);
269
0
  }
270
271
0
  if (inend - in < trailing) {
272
0
      break;
273
0
  }
274
275
0
  for ( ; trailing; trailing--) {
276
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277
0
    break;
278
0
      c <<= 6;
279
0
      c |= d & 0x3F;
280
0
  }
281
282
  /* assertion: c is a single UTF-4 value */
283
0
  if (c < 0x80) {
284
0
      if (out >= outend)
285
0
    break;
286
0
      *out++ = c;
287
0
  } else {
288
      /* no chance for this in Ascii */
289
0
      *outlen = out - outstart;
290
0
      *inlen = processed - instart;
291
0
      return(-2);
292
0
  }
293
0
  processed = in;
294
0
    }
295
0
    *outlen = out - outstart;
296
0
    *inlen = processed - instart;
297
0
    return(*outlen);
298
0
}
299
#endif /* LIBXML_OUTPUT_ENABLED */
300
301
/**
302
 * isolat1ToUTF8:
303
 * @out:  a pointer to an array of bytes to store the result
304
 * @outlen:  the length of @out
305
 * @in:  a pointer to an array of ISO Latin 1 chars
306
 * @inlen:  the length of @in
307
 *
308
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309
 * block of chars out.
310
 * Returns the number of bytes written if success, or -1 otherwise
311
 * The value of @inlen after return is the number of octets consumed
312
 *     if the return value is positive, else unpredictable.
313
 * The value of @outlen after return is the number of octets produced.
314
 */
315
int
316
isolat1ToUTF8(unsigned char* out, int *outlen,
317
36
              const unsigned char* in, int *inlen) {
318
36
    unsigned char* outstart = out;
319
36
    const unsigned char* base = in;
320
36
    unsigned char* outend;
321
36
    const unsigned char* inend;
322
36
    const unsigned char* instop;
323
324
36
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325
0
  return(-1);
326
327
36
    outend = out + *outlen;
328
36
    inend = in + (*inlen);
329
36
    instop = inend;
330
331
9.73k
    while ((in < inend) && (out < outend - 1)) {
332
9.69k
  if (*in >= 0x80) {
333
9.67k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
334
9.67k
            *out++ = ((*in) & 0x3F) | 0x80;
335
9.67k
      ++in;
336
9.67k
  }
337
9.69k
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
338
18.0k
  while ((in < instop) && (*in < 0x80)) {
339
8.32k
      *out++ = *in++;
340
8.32k
  }
341
9.69k
    }
342
36
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
343
0
        *out++ = *in++;
344
0
    }
345
36
    *outlen = out - outstart;
346
36
    *inlen = in - base;
347
36
    return(*outlen);
348
36
}
349
350
/**
351
 * UTF8ToUTF8:
352
 * @out:  a pointer to an array of bytes to store the result
353
 * @outlen:  the length of @out
354
 * @inb:  a pointer to an array of UTF-8 chars
355
 * @inlenb:  the length of @in in UTF-8 chars
356
 *
357
 * No op copy operation for UTF8 handling.
358
 *
359
 * Returns the number of bytes written, or -1 if lack of space.
360
 *     The value of *inlen after return is the number of octets consumed
361
 *     if the return value is positive, else unpredictable.
362
 */
363
static int
364
UTF8ToUTF8(unsigned char* out, int *outlen,
365
           const unsigned char* inb, int *inlenb)
366
0
{
367
0
    int len;
368
369
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370
0
  return(-1);
371
0
    if (inb == NULL) {
372
        /* inb == NULL means output is initialized. */
373
0
        *outlen = 0;
374
0
        *inlenb = 0;
375
0
        return(0);
376
0
    }
377
0
    if (*outlen > *inlenb) {
378
0
  len = *inlenb;
379
0
    } else {
380
0
  len = *outlen;
381
0
    }
382
0
    if (len < 0)
383
0
  return(-1);
384
385
    /*
386
     * FIXME: Conversion functions must assure valid UTF-8, so we have
387
     * to check for UTF-8 validity. Preferably, this converter shouldn't
388
     * be used at all.
389
     */
390
0
    memcpy(out, inb, len);
391
392
0
    *outlen = len;
393
0
    *inlenb = len;
394
0
    return(*outlen);
395
0
}
396
397
398
#ifdef LIBXML_OUTPUT_ENABLED
399
/**
400
 * UTF8Toisolat1:
401
 * @out:  a pointer to an array of bytes to store the result
402
 * @outlen:  the length of @out
403
 * @in:  a pointer to an array of UTF-8 chars
404
 * @inlen:  the length of @in
405
 *
406
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407
 * block of chars out.
408
 *
409
 * Returns the number of bytes written if success, -2 if the transcoding fails,
410
           or -1 otherwise
411
 * The value of @inlen after return is the number of octets consumed
412
 *     if the return value is positive, else unpredictable.
413
 * The value of @outlen after return is the number of octets produced.
414
 */
415
int
416
UTF8Toisolat1(unsigned char* out, int *outlen,
417
0
              const unsigned char* in, int *inlen) {
418
0
    const unsigned char* processed = in;
419
0
    const unsigned char* outend;
420
0
    const unsigned char* outstart = out;
421
0
    const unsigned char* instart = in;
422
0
    const unsigned char* inend;
423
0
    unsigned int c, d;
424
0
    int trailing;
425
426
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427
0
    if (in == NULL) {
428
        /*
429
   * initialization nothing to do
430
   */
431
0
  *outlen = 0;
432
0
  *inlen = 0;
433
0
  return(0);
434
0
    }
435
0
    inend = in + (*inlen);
436
0
    outend = out + (*outlen);
437
0
    while (in < inend) {
438
0
  d = *in++;
439
0
  if      (d < 0x80)  { c= d; trailing= 0; }
440
0
  else if (d < 0xC0) {
441
      /* trailing byte in leading position */
442
0
      *outlen = out - outstart;
443
0
      *inlen = processed - instart;
444
0
      return(-2);
445
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
446
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
447
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
448
0
  else {
449
      /* no chance for this in IsoLat1 */
450
0
      *outlen = out - outstart;
451
0
      *inlen = processed - instart;
452
0
      return(-2);
453
0
  }
454
455
0
  if (inend - in < trailing) {
456
0
      break;
457
0
  }
458
459
0
  for ( ; trailing; trailing--) {
460
0
      if (in >= inend)
461
0
    break;
462
0
      if (((d= *in++) & 0xC0) != 0x80) {
463
0
    *outlen = out - outstart;
464
0
    *inlen = processed - instart;
465
0
    return(-2);
466
0
      }
467
0
      c <<= 6;
468
0
      c |= d & 0x3F;
469
0
  }
470
471
  /* assertion: c is a single UTF-4 value */
472
0
  if (c <= 0xFF) {
473
0
      if (out >= outend)
474
0
    break;
475
0
      *out++ = c;
476
0
  } else {
477
      /* no chance for this in IsoLat1 */
478
0
      *outlen = out - outstart;
479
0
      *inlen = processed - instart;
480
0
      return(-2);
481
0
  }
482
0
  processed = in;
483
0
    }
484
0
    *outlen = out - outstart;
485
0
    *inlen = processed - instart;
486
0
    return(*outlen);
487
0
}
488
#endif /* LIBXML_OUTPUT_ENABLED */
489
490
/**
491
 * UTF16LEToUTF8:
492
 * @out:  a pointer to an array of bytes to store the result
493
 * @outlen:  the length of @out
494
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
495
 * @inlenb:  the length of @in in UTF-16LE chars
496
 *
497
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498
 * block of chars out. This function assumes the endian property
499
 * is the same between the native type of this machine and the
500
 * inputed one.
501
 *
502
 * Returns the number of bytes written, or -1 if lack of space, or -2
503
 *     if the transcoding fails (if *in is not a valid utf16 string)
504
 *     The value of *inlen after return is the number of octets consumed
505
 *     if the return value is positive, else unpredictable.
506
 */
507
static int
508
UTF16LEToUTF8(unsigned char* out, int *outlen,
509
            const unsigned char* inb, int *inlenb)
510
3.40k
{
511
3.40k
    unsigned char* outstart = out;
512
3.40k
    const unsigned char* processed = inb;
513
3.40k
    unsigned char* outend;
514
3.40k
    unsigned short* in = (unsigned short*) inb;
515
3.40k
    unsigned short* inend;
516
3.40k
    unsigned int c, d, inlen;
517
3.40k
    unsigned char *tmp;
518
3.40k
    int bits;
519
520
3.40k
    if (*outlen == 0) {
521
0
        *inlenb = 0;
522
0
        return(0);
523
0
    }
524
3.40k
    outend = out + *outlen;
525
3.40k
    if ((*inlenb % 2) == 1)
526
3.21k
        (*inlenb)--;
527
3.40k
    inlen = *inlenb / 2;
528
3.40k
    inend = in + inlen;
529
886k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
530
883k
        if (xmlLittleEndian) {
531
883k
      c= *in++;
532
883k
  } else {
533
0
      tmp = (unsigned char *) in;
534
0
      c = *tmp++;
535
0
      c = c | (((unsigned int)*tmp) << 8);
536
0
      in++;
537
0
  }
538
883k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
539
1.03k
      if (in >= inend) {           /* handle split mutli-byte characters */
540
626
    break;
541
626
      }
542
409
      if (xmlLittleEndian) {
543
409
    d = *in++;
544
409
      } else {
545
0
    tmp = (unsigned char *) in;
546
0
    d = *tmp++;
547
0
    d = d | (((unsigned int)*tmp) << 8);
548
0
    in++;
549
0
      }
550
409
            if ((d & 0xFC00) == 0xDC00) {
551
376
                c &= 0x03FF;
552
376
                c <<= 10;
553
376
                c |= d & 0x03FF;
554
376
                c += 0x10000;
555
376
            }
556
33
            else {
557
33
    *outlen = out - outstart;
558
33
    *inlenb = processed - inb;
559
33
          return(-2);
560
33
      }
561
409
        }
562
563
  /* assertion: c is a single UTF-4 value */
564
882k
        if (out >= outend)
565
0
      break;
566
882k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
567
877k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
568
876k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
569
376
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
570
571
2.63M
        for ( ; bits >= 0; bits-= 6) {
572
1.75M
            if (out >= outend)
573
0
          break;
574
1.75M
            *out++= ((c >> bits) & 0x3F) | 0x80;
575
1.75M
        }
576
882k
  processed = (const unsigned char*) in;
577
882k
    }
578
3.37k
    *outlen = out - outstart;
579
3.37k
    *inlenb = processed - inb;
580
3.37k
    return(*outlen);
581
3.40k
}
582
583
#ifdef LIBXML_OUTPUT_ENABLED
584
/**
585
 * UTF8ToUTF16LE:
586
 * @outb:  a pointer to an array of bytes to store the result
587
 * @outlen:  the length of @outb
588
 * @in:  a pointer to an array of UTF-8 chars
589
 * @inlen:  the length of @in
590
 *
591
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592
 * block of chars out.
593
 *
594
 * Returns the number of bytes written, or -1 if lack of space, or -2
595
 *     if the transcoding failed.
596
 */
597
static int
598
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599
            const unsigned char* in, int *inlen)
600
0
{
601
0
    unsigned short* out = (unsigned short*) outb;
602
0
    const unsigned char* processed = in;
603
0
    const unsigned char *const instart = in;
604
0
    unsigned short* outstart= out;
605
0
    unsigned short* outend;
606
0
    const unsigned char* inend;
607
0
    unsigned int c, d;
608
0
    int trailing;
609
0
    unsigned char *tmp;
610
0
    unsigned short tmp1, tmp2;
611
612
    /* UTF16LE encoding has no BOM */
613
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614
0
    if (in == NULL) {
615
0
  *outlen = 0;
616
0
  *inlen = 0;
617
0
  return(0);
618
0
    }
619
0
    inend= in + *inlen;
620
0
    outend = out + (*outlen / 2);
621
0
    while (in < inend) {
622
0
      d= *in++;
623
0
      if      (d < 0x80)  { c= d; trailing= 0; }
624
0
      else if (d < 0xC0) {
625
          /* trailing byte in leading position */
626
0
    *outlen = (out - outstart) * 2;
627
0
    *inlen = processed - instart;
628
0
    return(-2);
629
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
630
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
631
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
632
0
      else {
633
  /* no chance for this in UTF-16 */
634
0
  *outlen = (out - outstart) * 2;
635
0
  *inlen = processed - instart;
636
0
  return(-2);
637
0
      }
638
639
0
      if (inend - in < trailing) {
640
0
          break;
641
0
      }
642
643
0
      for ( ; trailing; trailing--) {
644
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645
0
        break;
646
0
          c <<= 6;
647
0
          c |= d & 0x3F;
648
0
      }
649
650
      /* assertion: c is a single UTF-4 value */
651
0
        if (c < 0x10000) {
652
0
            if (out >= outend)
653
0
          break;
654
0
      if (xmlLittleEndian) {
655
0
    *out++ = c;
656
0
      } else {
657
0
    tmp = (unsigned char *) out;
658
0
    *tmp = c ;
659
0
    *(tmp + 1) = c >> 8 ;
660
0
    out++;
661
0
      }
662
0
        }
663
0
        else if (c < 0x110000) {
664
0
            if (out+1 >= outend)
665
0
          break;
666
0
            c -= 0x10000;
667
0
      if (xmlLittleEndian) {
668
0
    *out++ = 0xD800 | (c >> 10);
669
0
    *out++ = 0xDC00 | (c & 0x03FF);
670
0
      } else {
671
0
    tmp1 = 0xD800 | (c >> 10);
672
0
    tmp = (unsigned char *) out;
673
0
    *tmp = (unsigned char) tmp1;
674
0
    *(tmp + 1) = tmp1 >> 8;
675
0
    out++;
676
677
0
    tmp2 = 0xDC00 | (c & 0x03FF);
678
0
    tmp = (unsigned char *) out;
679
0
    *tmp  = (unsigned char) tmp2;
680
0
    *(tmp + 1) = tmp2 >> 8;
681
0
    out++;
682
0
      }
683
0
        }
684
0
        else
685
0
      break;
686
0
  processed = in;
687
0
    }
688
0
    *outlen = (out - outstart) * 2;
689
0
    *inlen = processed - instart;
690
0
    return(*outlen);
691
0
}
692
693
/**
694
 * UTF8ToUTF16:
695
 * @outb:  a pointer to an array of bytes to store the result
696
 * @outlen:  the length of @outb
697
 * @in:  a pointer to an array of UTF-8 chars
698
 * @inlen:  the length of @in
699
 *
700
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701
 * block of chars out.
702
 *
703
 * Returns the number of bytes written, or -1 if lack of space, or -2
704
 *     if the transcoding failed.
705
 */
706
static int
707
UTF8ToUTF16(unsigned char* outb, int *outlen,
708
            const unsigned char* in, int *inlen)
709
0
{
710
0
    if (in == NULL) {
711
  /*
712
   * initialization, add the Byte Order Mark for UTF-16LE
713
   */
714
0
        if (*outlen >= 2) {
715
0
      outb[0] = 0xFF;
716
0
      outb[1] = 0xFE;
717
0
      *outlen = 2;
718
0
      *inlen = 0;
719
#ifdef DEBUG_ENCODING
720
            xmlGenericError(xmlGenericErrorContext,
721
        "Added FFFE Byte Order Mark\n");
722
#endif
723
0
      return(2);
724
0
  }
725
0
  *outlen = 0;
726
0
  *inlen = 0;
727
0
  return(0);
728
0
    }
729
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730
0
}
731
#endif /* LIBXML_OUTPUT_ENABLED */
732
733
/**
734
 * UTF16BEToUTF8:
735
 * @out:  a pointer to an array of bytes to store the result
736
 * @outlen:  the length of @out
737
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
738
 * @inlenb:  the length of @in in UTF-16 chars
739
 *
740
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741
 * block of chars out. This function assumes the endian property
742
 * is the same between the native type of this machine and the
743
 * inputed one.
744
 *
745
 * Returns the number of bytes written, or -1 if lack of space, or -2
746
 *     if the transcoding fails (if *in is not a valid utf16 string)
747
 * The value of *inlen after return is the number of octets consumed
748
 *     if the return value is positive, else unpredictable.
749
 */
750
static int
751
UTF16BEToUTF8(unsigned char* out, int *outlen,
752
            const unsigned char* inb, int *inlenb)
753
4.34k
{
754
4.34k
    unsigned char* outstart = out;
755
4.34k
    const unsigned char* processed = inb;
756
4.34k
    unsigned char* outend;
757
4.34k
    unsigned short* in = (unsigned short*) inb;
758
4.34k
    unsigned short* inend;
759
4.34k
    unsigned int c, d, inlen;
760
4.34k
    unsigned char *tmp;
761
4.34k
    int bits;
762
763
4.34k
    if (*outlen == 0) {
764
0
        *inlenb = 0;
765
0
        return(0);
766
0
    }
767
4.34k
    outend = out + *outlen;
768
4.34k
    if ((*inlenb % 2) == 1)
769
3.42k
        (*inlenb)--;
770
4.34k
    inlen = *inlenb / 2;
771
4.34k
    inend= in + inlen;
772
687k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
773
684k
  if (xmlLittleEndian) {
774
684k
      tmp = (unsigned char *) in;
775
684k
      c = *tmp++;
776
684k
      c = (c << 8) | (unsigned int) *tmp;
777
684k
      in++;
778
684k
  } else {
779
0
      c= *in++;
780
0
  }
781
684k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
782
919
      if (in >= inend) {           /* handle split mutli-byte characters */
783
638
                break;
784
638
      }
785
281
      if (xmlLittleEndian) {
786
281
    tmp = (unsigned char *) in;
787
281
    d = *tmp++;
788
281
    d = (d << 8) | (unsigned int) *tmp;
789
281
    in++;
790
281
      } else {
791
0
    d= *in++;
792
0
      }
793
281
            if ((d & 0xFC00) == 0xDC00) {
794
245
                c &= 0x03FF;
795
245
                c <<= 10;
796
245
                c |= d & 0x03FF;
797
245
                c += 0x10000;
798
245
            }
799
36
            else {
800
36
    *outlen = out - outstart;
801
36
    *inlenb = processed - inb;
802
36
          return(-2);
803
36
      }
804
281
        }
805
806
  /* assertion: c is a single UTF-4 value */
807
683k
        if (out >= outend)
808
0
      break;
809
683k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
810
680k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
811
679k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
812
245
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
813
814
2.04M
        for ( ; bits >= 0; bits-= 6) {
815
1.35M
            if (out >= outend)
816
0
          break;
817
1.35M
            *out++= ((c >> bits) & 0x3F) | 0x80;
818
1.35M
        }
819
683k
  processed = (const unsigned char*) in;
820
683k
    }
821
4.30k
    *outlen = out - outstart;
822
4.30k
    *inlenb = processed - inb;
823
4.30k
    return(*outlen);
824
4.34k
}
825
826
#ifdef LIBXML_OUTPUT_ENABLED
827
/**
828
 * UTF8ToUTF16BE:
829
 * @outb:  a pointer to an array of bytes to store the result
830
 * @outlen:  the length of @outb
831
 * @in:  a pointer to an array of UTF-8 chars
832
 * @inlen:  the length of @in
833
 *
834
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835
 * block of chars out.
836
 *
837
 * Returns the number of byte written, or -1 by lack of space, or -2
838
 *     if the transcoding failed.
839
 */
840
static int
841
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842
            const unsigned char* in, int *inlen)
843
0
{
844
0
    unsigned short* out = (unsigned short*) outb;
845
0
    const unsigned char* processed = in;
846
0
    const unsigned char *const instart = in;
847
0
    unsigned short* outstart= out;
848
0
    unsigned short* outend;
849
0
    const unsigned char* inend;
850
0
    unsigned int c, d;
851
0
    int trailing;
852
0
    unsigned char *tmp;
853
0
    unsigned short tmp1, tmp2;
854
855
    /* UTF-16BE has no BOM */
856
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857
0
    if (in == NULL) {
858
0
  *outlen = 0;
859
0
  *inlen = 0;
860
0
  return(0);
861
0
    }
862
0
    inend= in + *inlen;
863
0
    outend = out + (*outlen / 2);
864
0
    while (in < inend) {
865
0
      d= *in++;
866
0
      if      (d < 0x80)  { c= d; trailing= 0; }
867
0
      else if (d < 0xC0)  {
868
          /* trailing byte in leading position */
869
0
    *outlen = out - outstart;
870
0
    *inlen = processed - instart;
871
0
    return(-2);
872
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
873
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
874
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
875
0
      else {
876
          /* no chance for this in UTF-16 */
877
0
    *outlen = out - outstart;
878
0
    *inlen = processed - instart;
879
0
    return(-2);
880
0
      }
881
882
0
      if (inend - in < trailing) {
883
0
          break;
884
0
      }
885
886
0
      for ( ; trailing; trailing--) {
887
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
888
0
          c <<= 6;
889
0
          c |= d & 0x3F;
890
0
      }
891
892
      /* assertion: c is a single UTF-4 value */
893
0
        if (c < 0x10000) {
894
0
            if (out >= outend)  break;
895
0
      if (xmlLittleEndian) {
896
0
    tmp = (unsigned char *) out;
897
0
    *tmp = c >> 8;
898
0
    *(tmp + 1) = c;
899
0
    out++;
900
0
      } else {
901
0
    *out++ = c;
902
0
      }
903
0
        }
904
0
        else if (c < 0x110000) {
905
0
            if (out+1 >= outend)  break;
906
0
            c -= 0x10000;
907
0
      if (xmlLittleEndian) {
908
0
    tmp1 = 0xD800 | (c >> 10);
909
0
    tmp = (unsigned char *) out;
910
0
    *tmp = tmp1 >> 8;
911
0
    *(tmp + 1) = (unsigned char) tmp1;
912
0
    out++;
913
914
0
    tmp2 = 0xDC00 | (c & 0x03FF);
915
0
    tmp = (unsigned char *) out;
916
0
    *tmp = tmp2 >> 8;
917
0
    *(tmp + 1) = (unsigned char) tmp2;
918
0
    out++;
919
0
      } else {
920
0
    *out++ = 0xD800 | (c >> 10);
921
0
    *out++ = 0xDC00 | (c & 0x03FF);
922
0
      }
923
0
        }
924
0
        else
925
0
      break;
926
0
  processed = in;
927
0
    }
928
0
    *outlen = (out - outstart) * 2;
929
0
    *inlen = processed - instart;
930
0
    return(*outlen);
931
0
}
932
#endif /* LIBXML_OUTPUT_ENABLED */
933
934
/************************************************************************
935
 *                  *
936
 *    Generic encoding handling routines      *
937
 *                  *
938
 ************************************************************************/
939
940
/**
941
 * xmlDetectCharEncoding:
942
 * @in:  a pointer to the first bytes of the XML entity, must be at least
943
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
944
 * @len:  pointer to the length of the buffer
945
 *
946
 * Guess the encoding of the entity using the first bytes of the entity content
947
 * according to the non-normative appendix F of the XML-1.0 recommendation.
948
 *
949
 * Returns one of the XML_CHAR_ENCODING_... values.
950
 */
951
xmlCharEncoding
952
xmlDetectCharEncoding(const unsigned char* in, int len)
953
15.1k
{
954
15.1k
    if (in == NULL)
955
0
        return(XML_CHAR_ENCODING_NONE);
956
15.1k
    if (len >= 4) {
957
15.1k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
958
15.1k
      (in[2] == 0x00) && (in[3] == 0x3C))
959
25
      return(XML_CHAR_ENCODING_UCS4BE);
960
15.1k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961
15.1k
      (in[2] == 0x00) && (in[3] == 0x00))
962
33
      return(XML_CHAR_ENCODING_UCS4LE);
963
15.0k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
964
15.0k
      (in[2] == 0x3C) && (in[3] == 0x00))
965
1
      return(XML_CHAR_ENCODING_UCS4_2143);
966
15.0k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967
15.0k
      (in[2] == 0x00) && (in[3] == 0x00))
968
1
      return(XML_CHAR_ENCODING_UCS4_3412);
969
15.0k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970
15.0k
      (in[2] == 0xA7) && (in[3] == 0x94))
971
18
      return(XML_CHAR_ENCODING_EBCDIC);
972
15.0k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973
15.0k
      (in[2] == 0x78) && (in[3] == 0x6D))
974
3.08k
      return(XML_CHAR_ENCODING_UTF8);
975
  /*
976
   * Although not part of the recommendation, we also
977
   * attempt an "auto-recognition" of UTF-16LE and
978
   * UTF-16BE encodings.
979
   */
980
11.9k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981
11.9k
      (in[2] == 0x3F) && (in[3] == 0x00))
982
136
      return(XML_CHAR_ENCODING_UTF16LE);
983
11.8k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984
11.8k
      (in[2] == 0x00) && (in[3] == 0x3F))
985
156
      return(XML_CHAR_ENCODING_UTF16BE);
986
11.8k
    }
987
11.6k
    if (len >= 3) {
988
  /*
989
   * Errata on XML-1.0 June 20 2001
990
   * We now allow an UTF8 encoded BOM
991
   */
992
11.6k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993
11.6k
      (in[2] == 0xBF))
994
630
      return(XML_CHAR_ENCODING_UTF8);
995
11.6k
    }
996
    /* For UTF-16 we can recognize by the BOM */
997
11.0k
    if (len >= 2) {
998
11.0k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
999
226
      return(XML_CHAR_ENCODING_UTF16BE);
1000
10.8k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001
126
      return(XML_CHAR_ENCODING_UTF16LE);
1002
10.8k
    }
1003
10.7k
    return(XML_CHAR_ENCODING_NONE);
1004
11.0k
}
1005
1006
/**
1007
 * xmlCleanupEncodingAliases:
1008
 *
1009
 * Unregisters all aliases
1010
 */
1011
void
1012
0
xmlCleanupEncodingAliases(void) {
1013
0
    int i;
1014
1015
0
    if (xmlCharEncodingAliases == NULL)
1016
0
  return;
1017
1018
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019
0
  if (xmlCharEncodingAliases[i].name != NULL)
1020
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1021
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1022
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023
0
    }
1024
0
    xmlCharEncodingAliasesNb = 0;
1025
0
    xmlCharEncodingAliasesMax = 0;
1026
0
    xmlFree(xmlCharEncodingAliases);
1027
0
    xmlCharEncodingAliases = NULL;
1028
0
}
1029
1030
/**
1031
 * xmlGetEncodingAlias:
1032
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1033
 *
1034
 * Lookup an encoding name for the given alias.
1035
 *
1036
 * Returns NULL if not found, otherwise the original name
1037
 */
1038
const char *
1039
3.34k
xmlGetEncodingAlias(const char *alias) {
1040
3.34k
    int i;
1041
3.34k
    char upper[100];
1042
1043
3.34k
    if (alias == NULL)
1044
0
  return(NULL);
1045
1046
3.34k
    if (xmlCharEncodingAliases == NULL)
1047
3.34k
  return(NULL);
1048
1049
0
    for (i = 0;i < 99;i++) {
1050
0
        upper[i] = toupper(alias[i]);
1051
0
  if (upper[i] == 0) break;
1052
0
    }
1053
0
    upper[i] = 0;
1054
1055
    /*
1056
     * Walk down the list looking for a definition of the alias
1057
     */
1058
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060
0
      return(xmlCharEncodingAliases[i].name);
1061
0
  }
1062
0
    }
1063
0
    return(NULL);
1064
0
}
1065
1066
/**
1067
 * xmlAddEncodingAlias:
1068
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1069
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1070
 *
1071
 * Registers an alias @alias for an encoding named @name. Existing alias
1072
 * will be overwritten.
1073
 *
1074
 * Returns 0 in case of success, -1 in case of error
1075
 */
1076
int
1077
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1078
0
    int i;
1079
0
    char upper[100];
1080
1081
0
    if ((name == NULL) || (alias == NULL))
1082
0
  return(-1);
1083
1084
0
    for (i = 0;i < 99;i++) {
1085
0
        upper[i] = toupper(alias[i]);
1086
0
  if (upper[i] == 0) break;
1087
0
    }
1088
0
    upper[i] = 0;
1089
1090
0
    if (xmlCharEncodingAliases == NULL) {
1091
0
  xmlCharEncodingAliasesNb = 0;
1092
0
  xmlCharEncodingAliasesMax = 20;
1093
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095
0
  if (xmlCharEncodingAliases == NULL)
1096
0
      return(-1);
1097
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098
0
  xmlCharEncodingAliasesMax *= 2;
1099
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100
0
        xmlRealloc(xmlCharEncodingAliases,
1101
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102
0
    }
1103
    /*
1104
     * Walk down the list looking for a definition of the alias
1105
     */
1106
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108
      /*
1109
       * Replace the definition.
1110
       */
1111
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1112
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113
0
      return(0);
1114
0
  }
1115
0
    }
1116
    /*
1117
     * Add the definition
1118
     */
1119
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121
0
    xmlCharEncodingAliasesNb++;
1122
0
    return(0);
1123
0
}
1124
1125
/**
1126
 * xmlDelEncodingAlias:
1127
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1128
 *
1129
 * Unregisters an encoding alias @alias
1130
 *
1131
 * Returns 0 in case of success, -1 in case of error
1132
 */
1133
int
1134
0
xmlDelEncodingAlias(const char *alias) {
1135
0
    int i;
1136
1137
0
    if (alias == NULL)
1138
0
  return(-1);
1139
1140
0
    if (xmlCharEncodingAliases == NULL)
1141
0
  return(-1);
1142
    /*
1143
     * Walk down the list looking for a definition of the alias
1144
     */
1145
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1148
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149
0
      xmlCharEncodingAliasesNb--;
1150
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152
0
      return(0);
1153
0
  }
1154
0
    }
1155
0
    return(-1);
1156
0
}
1157
1158
/**
1159
 * xmlParseCharEncoding:
1160
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1161
 *
1162
 * Compare the string to the encoding schemes already known. Note
1163
 * that the comparison is case insensitive accordingly to the section
1164
 * [XML] 4.3.3 Character Encoding in Entities.
1165
 *
1166
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167
 * if not recognized.
1168
 */
1169
xmlCharEncoding
1170
xmlParseCharEncoding(const char* name)
1171
503
{
1172
503
    const char *alias;
1173
503
    char upper[500];
1174
503
    int i;
1175
1176
503
    if (name == NULL)
1177
0
  return(XML_CHAR_ENCODING_NONE);
1178
1179
    /*
1180
     * Do the alias resolution
1181
     */
1182
503
    alias = xmlGetEncodingAlias(name);
1183
503
    if (alias != NULL)
1184
0
  name = alias;
1185
1186
7.18k
    for (i = 0;i < 499;i++) {
1187
7.18k
        upper[i] = toupper(name[i]);
1188
7.18k
  if (upper[i] == 0) break;
1189
7.18k
    }
1190
503
    upper[i] = 0;
1191
1192
503
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193
503
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194
503
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195
1196
    /*
1197
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1198
     *       already found and in use
1199
     */
1200
503
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201
503
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
1203
503
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204
501
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205
501
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206
1207
    /*
1208
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1209
     *       already found and in use
1210
     */
1211
501
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212
443
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213
443
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
1215
1216
443
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217
443
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218
439
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219
1220
439
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221
439
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222
438
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223
1224
438
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225
438
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226
438
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227
438
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228
438
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229
438
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230
438
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231
1232
438
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233
438
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234
438
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235
1236
#ifdef DEBUG_ENCODING
1237
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238
#endif
1239
438
    return(XML_CHAR_ENCODING_ERROR);
1240
438
}
1241
1242
/**
1243
 * xmlGetCharEncodingName:
1244
 * @enc:  the encoding
1245
 *
1246
 * The "canonical" name for XML encoding.
1247
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248
 * Section 4.3.3  Character Encoding in Entities
1249
 *
1250
 * Returns the canonical name for the given encoding
1251
 */
1252
1253
const char*
1254
67
xmlGetCharEncodingName(xmlCharEncoding enc) {
1255
67
    switch (enc) {
1256
0
        case XML_CHAR_ENCODING_ERROR:
1257
0
      return(NULL);
1258
0
        case XML_CHAR_ENCODING_NONE:
1259
0
      return(NULL);
1260
0
        case XML_CHAR_ENCODING_UTF8:
1261
0
      return("UTF-8");
1262
0
        case XML_CHAR_ENCODING_UTF16LE:
1263
0
      return("UTF-16");
1264
0
        case XML_CHAR_ENCODING_UTF16BE:
1265
0
      return("UTF-16");
1266
0
        case XML_CHAR_ENCODING_EBCDIC:
1267
0
            return("EBCDIC");
1268
58
        case XML_CHAR_ENCODING_UCS4LE:
1269
58
            return("ISO-10646-UCS-4");
1270
0
        case XML_CHAR_ENCODING_UCS4BE:
1271
0
            return("ISO-10646-UCS-4");
1272
1
        case XML_CHAR_ENCODING_UCS4_2143:
1273
1
            return("ISO-10646-UCS-4");
1274
1
        case XML_CHAR_ENCODING_UCS4_3412:
1275
1
            return("ISO-10646-UCS-4");
1276
2
        case XML_CHAR_ENCODING_UCS2:
1277
2
            return("ISO-10646-UCS-2");
1278
4
        case XML_CHAR_ENCODING_8859_1:
1279
4
      return("ISO-8859-1");
1280
1
        case XML_CHAR_ENCODING_8859_2:
1281
1
      return("ISO-8859-2");
1282
0
        case XML_CHAR_ENCODING_8859_3:
1283
0
      return("ISO-8859-3");
1284
0
        case XML_CHAR_ENCODING_8859_4:
1285
0
      return("ISO-8859-4");
1286
0
        case XML_CHAR_ENCODING_8859_5:
1287
0
      return("ISO-8859-5");
1288
0
        case XML_CHAR_ENCODING_8859_6:
1289
0
      return("ISO-8859-6");
1290
0
        case XML_CHAR_ENCODING_8859_7:
1291
0
      return("ISO-8859-7");
1292
0
        case XML_CHAR_ENCODING_8859_8:
1293
0
      return("ISO-8859-8");
1294
0
        case XML_CHAR_ENCODING_8859_9:
1295
0
      return("ISO-8859-9");
1296
0
        case XML_CHAR_ENCODING_2022_JP:
1297
0
            return("ISO-2022-JP");
1298
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1299
0
            return("Shift-JIS");
1300
0
        case XML_CHAR_ENCODING_EUC_JP:
1301
0
            return("EUC-JP");
1302
0
  case XML_CHAR_ENCODING_ASCII:
1303
0
      return(NULL);
1304
67
    }
1305
0
    return(NULL);
1306
67
}
1307
1308
/************************************************************************
1309
 *                  *
1310
 *      Char encoding handlers        *
1311
 *                  *
1312
 ************************************************************************/
1313
1314
1315
/* the size should be growable, but it's not a big deal ... */
1316
9
#define MAX_ENCODING_HANDLERS 50
1317
static xmlCharEncodingHandlerPtr *handlers = NULL;
1318
static int nbCharEncodingHandler = 0;
1319
1320
/*
1321
 * The default is UTF-8 for XML, that's also the default used for the
1322
 * parser internals, so the default encoding handler is NULL
1323
 */
1324
1325
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326
1327
/**
1328
 * xmlNewCharEncodingHandler:
1329
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1330
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1331
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1332
 *
1333
 * Create and registers an xmlCharEncodingHandler.
1334
 *
1335
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336
 */
1337
xmlCharEncodingHandlerPtr
1338
xmlNewCharEncodingHandler(const char *name,
1339
                          xmlCharEncodingInputFunc input,
1340
8
                          xmlCharEncodingOutputFunc output) {
1341
8
    xmlCharEncodingHandlerPtr handler;
1342
8
    const char *alias;
1343
8
    char upper[500];
1344
8
    int i;
1345
8
    char *up = NULL;
1346
1347
    /*
1348
     * Do the alias resolution
1349
     */
1350
8
    alias = xmlGetEncodingAlias(name);
1351
8
    if (alias != NULL)
1352
0
  name = alias;
1353
1354
    /*
1355
     * Keep only the uppercase version of the encoding.
1356
     */
1357
8
    if (name == NULL) {
1358
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1359
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1360
0
  return(NULL);
1361
0
    }
1362
62
    for (i = 0;i < 499;i++) {
1363
62
        upper[i] = toupper(name[i]);
1364
62
  if (upper[i] == 0) break;
1365
62
    }
1366
8
    upper[i] = 0;
1367
8
    up = xmlMemStrdup(upper);
1368
8
    if (up == NULL) {
1369
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370
0
  return(NULL);
1371
0
    }
1372
1373
    /*
1374
     * allocate and fill-up an handler block.
1375
     */
1376
8
    handler = (xmlCharEncodingHandlerPtr)
1377
8
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1378
8
    if (handler == NULL) {
1379
0
        xmlFree(up);
1380
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381
0
  return(NULL);
1382
0
    }
1383
8
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384
8
    handler->input = input;
1385
8
    handler->output = output;
1386
8
    handler->name = up;
1387
1388
8
#ifdef LIBXML_ICONV_ENABLED
1389
8
    handler->iconv_in = NULL;
1390
8
    handler->iconv_out = NULL;
1391
8
#endif
1392
#ifdef LIBXML_ICU_ENABLED
1393
    handler->uconv_in = NULL;
1394
    handler->uconv_out = NULL;
1395
#endif
1396
1397
    /*
1398
     * registers and returns the handler.
1399
     */
1400
8
    xmlRegisterCharEncodingHandler(handler);
1401
#ifdef DEBUG_ENCODING
1402
    xmlGenericError(xmlGenericErrorContext,
1403
      "Registered encoding handler for %s\n", name);
1404
#endif
1405
8
    return(handler);
1406
8
}
1407
1408
/**
1409
 * xmlInitCharEncodingHandlers:
1410
 *
1411
 * DEPRECATED: This function will be made private. Call xmlInitParser to
1412
 * initialize the library.
1413
 *
1414
 * Initialize the char encoding support, it registers the default
1415
 * encoding supported.
1416
 * NOTE: while public, this function usually doesn't need to be called
1417
 *       in normal processing.
1418
 */
1419
void
1420
1
xmlInitCharEncodingHandlers(void) {
1421
1
    unsigned short int tst = 0x1234;
1422
1
    unsigned char *ptr = (unsigned char *) &tst;
1423
1424
1
    if (handlers != NULL) return;
1425
1426
1
    handlers = (xmlCharEncodingHandlerPtr *)
1427
1
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428
1429
1
    if (*ptr == 0x12) xmlLittleEndian = 0;
1430
1
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1431
0
    else {
1432
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433
0
                 "Odd problem at endianness detection\n", NULL);
1434
0
    }
1435
1436
1
    if (handlers == NULL) {
1437
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438
0
  return;
1439
0
    }
1440
1
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441
1
#ifdef LIBXML_OUTPUT_ENABLED
1442
1
    xmlUTF16LEHandler =
1443
1
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444
1
    xmlUTF16BEHandler =
1445
1
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446
1
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447
1
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448
1
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449
1
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450
1
#ifdef LIBXML_HTML_ENABLED
1451
1
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452
1
#endif
1453
#else
1454
    xmlUTF16LEHandler =
1455
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456
    xmlUTF16BEHandler =
1457
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462
#endif /* LIBXML_OUTPUT_ENABLED */
1463
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464
#ifdef LIBXML_ISO8859X_ENABLED
1465
    xmlRegisterCharEncodingHandlersISO8859x ();
1466
#endif
1467
#endif
1468
1469
1
}
1470
1471
/**
1472
 * xmlCleanupCharEncodingHandlers:
1473
 *
1474
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475
 * to free global state but see the warnings there. xmlCleanupParser
1476
 * should be only called once at program exit. In most cases, you don't
1477
 * have call cleanup functions at all.
1478
 *
1479
 * Cleanup the memory allocated for the char encoding support, it
1480
 * unregisters all the encoding handlers and the aliases.
1481
 */
1482
void
1483
0
xmlCleanupCharEncodingHandlers(void) {
1484
0
    xmlCleanupEncodingAliases();
1485
1486
0
    if (handlers == NULL) return;
1487
1488
0
    for (;nbCharEncodingHandler > 0;) {
1489
0
        nbCharEncodingHandler--;
1490
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1491
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1492
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1493
0
      xmlFree(handlers[nbCharEncodingHandler]);
1494
0
  }
1495
0
    }
1496
0
    xmlFree(handlers);
1497
0
    handlers = NULL;
1498
0
    nbCharEncodingHandler = 0;
1499
0
    xmlDefaultCharEncodingHandler = NULL;
1500
0
}
1501
1502
/**
1503
 * xmlRegisterCharEncodingHandler:
1504
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1505
 *
1506
 * Register the char encoding handler, surprising, isn't it ?
1507
 */
1508
void
1509
8
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510
8
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1511
8
    if ((handler == NULL) || (handlers == NULL)) {
1512
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1513
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514
0
        goto free_handler;
1515
0
    }
1516
1517
8
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520
0
                 "MAX_ENCODING_HANDLERS");
1521
0
        goto free_handler;
1522
0
    }
1523
8
    handlers[nbCharEncodingHandler++] = handler;
1524
8
    return;
1525
1526
0
free_handler:
1527
0
    if (handler != NULL) {
1528
0
        if (handler->name != NULL) {
1529
0
            xmlFree(handler->name);
1530
0
        }
1531
0
        xmlFree(handler);
1532
0
    }
1533
0
}
1534
1535
/**
1536
 * xmlGetCharEncodingHandler:
1537
 * @enc:  an xmlCharEncoding value.
1538
 *
1539
 * Search in the registered set the handler able to read/write that encoding.
1540
 *
1541
 * Returns the handler or NULL if not found
1542
 */
1543
xmlCharEncodingHandlerPtr
1544
15.9k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545
15.9k
    xmlCharEncodingHandlerPtr handler;
1546
1547
15.9k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1548
15.9k
    switch (enc) {
1549
0
        case XML_CHAR_ENCODING_ERROR:
1550
0
      return(NULL);
1551
15.2k
        case XML_CHAR_ENCODING_NONE:
1552
15.2k
      return(NULL);
1553
0
        case XML_CHAR_ENCODING_UTF8:
1554
0
      return(NULL);
1555
262
        case XML_CHAR_ENCODING_UTF16LE:
1556
262
      return(xmlUTF16LEHandler);
1557
382
        case XML_CHAR_ENCODING_UTF16BE:
1558
382
      return(xmlUTF16BEHandler);
1559
18
        case XML_CHAR_ENCODING_EBCDIC:
1560
18
            handler = xmlFindCharEncodingHandler("EBCDIC");
1561
18
            if (handler != NULL) return(handler);
1562
18
            handler = xmlFindCharEncodingHandler("ebcdic");
1563
18
            if (handler != NULL) return(handler);
1564
18
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565
18
            if (handler != NULL) return(handler);
1566
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1567
0
            if (handler != NULL) return(handler);
1568
0
      break;
1569
25
        case XML_CHAR_ENCODING_UCS4BE:
1570
25
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571
25
            if (handler != NULL) return(handler);
1572
25
            handler = xmlFindCharEncodingHandler("UCS-4");
1573
25
            if (handler != NULL) return(handler);
1574
0
            handler = xmlFindCharEncodingHandler("UCS4");
1575
0
            if (handler != NULL) return(handler);
1576
0
      break;
1577
33
        case XML_CHAR_ENCODING_UCS4LE:
1578
33
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579
33
            if (handler != NULL) return(handler);
1580
33
            handler = xmlFindCharEncodingHandler("UCS-4");
1581
33
            if (handler != NULL) return(handler);
1582
0
            handler = xmlFindCharEncodingHandler("UCS4");
1583
0
            if (handler != NULL) return(handler);
1584
0
      break;
1585
1
        case XML_CHAR_ENCODING_UCS4_2143:
1586
1
      break;
1587
1
        case XML_CHAR_ENCODING_UCS4_3412:
1588
1
      break;
1589
0
        case XML_CHAR_ENCODING_UCS2:
1590
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591
0
            if (handler != NULL) return(handler);
1592
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1593
0
            if (handler != NULL) return(handler);
1594
0
            handler = xmlFindCharEncodingHandler("UCS2");
1595
0
            if (handler != NULL) return(handler);
1596
0
      break;
1597
1598
      /*
1599
       * We used to keep ISO Latin encodings native in the
1600
       * generated data. This led to so many problems that
1601
       * this has been removed. One can still change this
1602
       * back by registering no-ops encoders for those
1603
       */
1604
0
        case XML_CHAR_ENCODING_8859_1:
1605
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606
0
      if (handler != NULL) return(handler);
1607
0
      break;
1608
0
        case XML_CHAR_ENCODING_8859_2:
1609
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610
0
      if (handler != NULL) return(handler);
1611
0
      break;
1612
0
        case XML_CHAR_ENCODING_8859_3:
1613
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614
0
      if (handler != NULL) return(handler);
1615
0
      break;
1616
0
        case XML_CHAR_ENCODING_8859_4:
1617
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618
0
      if (handler != NULL) return(handler);
1619
0
      break;
1620
0
        case XML_CHAR_ENCODING_8859_5:
1621
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622
0
      if (handler != NULL) return(handler);
1623
0
      break;
1624
0
        case XML_CHAR_ENCODING_8859_6:
1625
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626
0
      if (handler != NULL) return(handler);
1627
0
      break;
1628
0
        case XML_CHAR_ENCODING_8859_7:
1629
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630
0
      if (handler != NULL) return(handler);
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_8859_8:
1633
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634
0
      if (handler != NULL) return(handler);
1635
0
      break;
1636
0
        case XML_CHAR_ENCODING_8859_9:
1637
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638
0
      if (handler != NULL) return(handler);
1639
0
      break;
1640
1641
1642
0
        case XML_CHAR_ENCODING_2022_JP:
1643
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644
0
            if (handler != NULL) return(handler);
1645
0
      break;
1646
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1647
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648
0
            if (handler != NULL) return(handler);
1649
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650
0
            if (handler != NULL) return(handler);
1651
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1652
0
            if (handler != NULL) return(handler);
1653
0
      break;
1654
0
        case XML_CHAR_ENCODING_EUC_JP:
1655
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1656
0
            if (handler != NULL) return(handler);
1657
0
      break;
1658
0
  default:
1659
0
      break;
1660
15.9k
    }
1661
1662
#ifdef DEBUG_ENCODING
1663
    xmlGenericError(xmlGenericErrorContext,
1664
      "No handler found for encoding %d\n", enc);
1665
#endif
1666
2
    return(NULL);
1667
15.9k
}
1668
1669
/**
1670
 * xmlFindCharEncodingHandler:
1671
 * @name:  a string describing the char encoding.
1672
 *
1673
 * Search in the registered set the handler able to read/write that encoding.
1674
 *
1675
 * Returns the handler or NULL if not found
1676
 */
1677
xmlCharEncodingHandlerPtr
1678
2.83k
xmlFindCharEncodingHandler(const char *name) {
1679
2.83k
    const char *nalias;
1680
2.83k
    const char *norig;
1681
2.83k
    xmlCharEncoding alias;
1682
2.83k
#ifdef LIBXML_ICONV_ENABLED
1683
2.83k
    xmlCharEncodingHandlerPtr enc;
1684
2.83k
    iconv_t icv_in, icv_out;
1685
2.83k
#endif /* LIBXML_ICONV_ENABLED */
1686
#ifdef LIBXML_ICU_ENABLED
1687
    xmlCharEncodingHandlerPtr encu;
1688
    uconv_t *ucv_in, *ucv_out;
1689
#endif /* LIBXML_ICU_ENABLED */
1690
2.83k
    char upper[100];
1691
2.83k
    int i;
1692
1693
2.83k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1694
2.83k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695
2.83k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696
1697
    /*
1698
     * Do the alias resolution
1699
     */
1700
2.83k
    norig = name;
1701
2.83k
    nalias = xmlGetEncodingAlias(name);
1702
2.83k
    if (nalias != NULL)
1703
0
  name = nalias;
1704
1705
    /*
1706
     * Check first for directly registered encoding names
1707
     */
1708
21.6k
    for (i = 0;i < 99;i++) {
1709
21.6k
        upper[i] = toupper(name[i]);
1710
21.6k
  if (upper[i] == 0) break;
1711
21.6k
    }
1712
2.83k
    upper[i] = 0;
1713
1714
2.83k
    if (handlers != NULL) {
1715
25.2k
        for (i = 0;i < nbCharEncodingHandler; i++) {
1716
22.4k
            if (!strcmp(upper, handlers[i]->name)) {
1717
#ifdef DEBUG_ENCODING
1718
                xmlGenericError(xmlGenericErrorContext,
1719
                        "Found registered handler for encoding %s\n", name);
1720
#endif
1721
73
                return(handlers[i]);
1722
73
            }
1723
22.4k
        }
1724
2.83k
    }
1725
1726
2.75k
#ifdef LIBXML_ICONV_ENABLED
1727
    /* check whether iconv can handle this */
1728
2.75k
    icv_in = iconv_open("UTF-8", name);
1729
2.75k
    icv_out = iconv_open(name, "UTF-8");
1730
2.75k
    if (icv_in == (iconv_t) -1) {
1731
503
        icv_in = iconv_open("UTF-8", upper);
1732
503
    }
1733
2.75k
    if (icv_out == (iconv_t) -1) {
1734
503
  icv_out = iconv_open(upper, "UTF-8");
1735
503
    }
1736
2.75k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737
2.25k
      enc = (xmlCharEncodingHandlerPtr)
1738
2.25k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1739
2.25k
      if (enc == NULL) {
1740
0
          iconv_close(icv_in);
1741
0
          iconv_close(icv_out);
1742
0
    return(NULL);
1743
0
      }
1744
2.25k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745
2.25k
      enc->name = xmlMemStrdup(name);
1746
2.25k
      enc->input = NULL;
1747
2.25k
      enc->output = NULL;
1748
2.25k
      enc->iconv_in = icv_in;
1749
2.25k
      enc->iconv_out = icv_out;
1750
#ifdef DEBUG_ENCODING
1751
            xmlGenericError(xmlGenericErrorContext,
1752
        "Found iconv handler for encoding %s\n", name);
1753
#endif
1754
2.25k
      return enc;
1755
2.25k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757
0
        "iconv : problems with filters for '%s'\n", name);
1758
0
      if (icv_in != (iconv_t) -1)
1759
0
    iconv_close(icv_in);
1760
0
      else
1761
0
    iconv_close(icv_out);
1762
0
    }
1763
503
#endif /* LIBXML_ICONV_ENABLED */
1764
#ifdef LIBXML_ICU_ENABLED
1765
    /* check whether icu can handle this */
1766
    ucv_in = openIcuConverter(name, 1);
1767
    ucv_out = openIcuConverter(name, 0);
1768
    if (ucv_in != NULL && ucv_out != NULL) {
1769
      encu = (xmlCharEncodingHandlerPtr)
1770
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1771
      if (encu == NULL) {
1772
                closeIcuConverter(ucv_in);
1773
                closeIcuConverter(ucv_out);
1774
    return(NULL);
1775
      }
1776
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777
      encu->name = xmlMemStrdup(name);
1778
      encu->input = NULL;
1779
      encu->output = NULL;
1780
      encu->uconv_in = ucv_in;
1781
      encu->uconv_out = ucv_out;
1782
#ifdef DEBUG_ENCODING
1783
            xmlGenericError(xmlGenericErrorContext,
1784
        "Found ICU converter handler for encoding %s\n", name);
1785
#endif
1786
      return encu;
1787
    } else if (ucv_in != NULL || ucv_out != NULL) {
1788
            closeIcuConverter(ucv_in);
1789
            closeIcuConverter(ucv_out);
1790
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791
        "ICU converter : problems with filters for '%s'\n", name);
1792
    }
1793
#endif /* LIBXML_ICU_ENABLED */
1794
1795
#ifdef DEBUG_ENCODING
1796
    xmlGenericError(xmlGenericErrorContext,
1797
      "No handler found for encoding %s\n", name);
1798
#endif
1799
1800
    /*
1801
     * Fallback using the canonical names
1802
     */
1803
503
    alias = xmlParseCharEncoding(norig);
1804
503
    if (alias != XML_CHAR_ENCODING_ERROR) {
1805
65
        const char* canon;
1806
65
        canon = xmlGetCharEncodingName(alias);
1807
65
        if ((canon != NULL) && (strcmp(name, canon))) {
1808
6
      return(xmlFindCharEncodingHandler(canon));
1809
6
        }
1810
65
    }
1811
1812
    /* If "none of the above", give up */
1813
497
    return(NULL);
1814
503
}
1815
1816
/************************************************************************
1817
 *                  *
1818
 *    ICONV based generic conversion functions    *
1819
 *                  *
1820
 ************************************************************************/
1821
1822
#ifdef LIBXML_ICONV_ENABLED
1823
/**
1824
 * xmlIconvWrapper:
1825
 * @cd:   iconv converter data structure
1826
 * @out:  a pointer to an array of bytes to store the result
1827
 * @outlen:  the length of @out
1828
 * @in:  a pointer to an array of input bytes
1829
 * @inlen:  the length of @in
1830
 *
1831
 * Returns 0 if success, or
1832
 *     -1 by lack of space, or
1833
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1834
 *        the result of transformation can't fit into the encoding we want), or
1835
 *     -3 if there the last byte can't form a single output char.
1836
 *
1837
 * The value of @inlen after return is the number of octets consumed
1838
 *     as the return value is positive, else unpredictable.
1839
 * The value of @outlen after return is the number of octets produced.
1840
 */
1841
static int
1842
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843
6.96k
                const unsigned char *in, int *inlen) {
1844
6.96k
    size_t icv_inlen, icv_outlen;
1845
6.96k
    const char *icv_in = (const char *) in;
1846
6.96k
    char *icv_out = (char *) out;
1847
6.96k
    size_t ret;
1848
1849
6.96k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850
0
        if (outlen != NULL) *outlen = 0;
1851
0
        return(-1);
1852
0
    }
1853
6.96k
    icv_inlen = *inlen;
1854
6.96k
    icv_outlen = *outlen;
1855
    /*
1856
     * Some versions take const, other versions take non-const input.
1857
     */
1858
6.96k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859
6.96k
    *inlen -= icv_inlen;
1860
6.96k
    *outlen -= icv_outlen;
1861
6.96k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862
5.01k
#ifdef EILSEQ
1863
5.01k
        if (errno == EILSEQ) {
1864
661
            return -2;
1865
661
        } else
1866
4.35k
#endif
1867
4.35k
#ifdef E2BIG
1868
4.35k
        if (errno == E2BIG) {
1869
4.15k
            return -1;
1870
4.15k
        } else
1871
202
#endif
1872
202
#ifdef EINVAL
1873
202
        if (errno == EINVAL) {
1874
202
            return -3;
1875
202
        } else
1876
0
#endif
1877
0
        {
1878
0
            return -3;
1879
0
        }
1880
5.01k
    }
1881
1.95k
    return 0;
1882
6.96k
}
1883
#endif /* LIBXML_ICONV_ENABLED */
1884
1885
/************************************************************************
1886
 *                  *
1887
 *    ICU based generic conversion functions    *
1888
 *                  *
1889
 ************************************************************************/
1890
1891
#ifdef LIBXML_ICU_ENABLED
1892
/**
1893
 * xmlUconvWrapper:
1894
 * @cd: ICU uconverter data structure
1895
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896
 * @out:  a pointer to an array of bytes to store the result
1897
 * @outlen:  the length of @out
1898
 * @in:  a pointer to an array of input bytes
1899
 * @inlen:  the length of @in
1900
 * @flush: if true, indicates end of input
1901
 *
1902
 * Returns 0 if success, or
1903
 *     -1 by lack of space, or
1904
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905
 *        the result of transformation can't fit into the encoding we want), or
1906
 *     -3 if there the last byte can't form a single output char.
1907
 *
1908
 * The value of @inlen after return is the number of octets consumed
1909
 *     as the return value is positive, else unpredictable.
1910
 * The value of @outlen after return is the number of octets produced.
1911
 */
1912
static int
1913
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1914
                const unsigned char *in, int *inlen, int flush) {
1915
    const char *ucv_in = (const char *) in;
1916
    char *ucv_out = (char *) out;
1917
    UErrorCode err = U_ZERO_ERROR;
1918
1919
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920
        if (outlen != NULL) *outlen = 0;
1921
        return(-1);
1922
    }
1923
1924
    if (toUnicode) {
1925
        /* encoding => UTF-16 => UTF-8 */
1926
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1927
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1928
                       &cd->pivot_source, &cd->pivot_target,
1929
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930
    } else {
1931
        /* UTF-8 => UTF-16 => encoding */
1932
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1933
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1934
                       &cd->pivot_source, &cd->pivot_target,
1935
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936
    }
1937
    *inlen = ucv_in - (const char*) in;
1938
    *outlen = ucv_out - (char *) out;
1939
    if (U_SUCCESS(err)) {
1940
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1941
        if (flush)
1942
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943
        return 0;
1944
    }
1945
    if (err == U_BUFFER_OVERFLOW_ERROR)
1946
        return -1;
1947
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948
        return -2;
1949
    return -3;
1950
}
1951
#endif /* LIBXML_ICU_ENABLED */
1952
1953
/************************************************************************
1954
 *                  *
1955
 *    The real API used by libxml for on-the-fly conversion *
1956
 *                  *
1957
 ************************************************************************/
1958
1959
/**
1960
 * xmlEncInputChunk:
1961
 * @handler:  encoding handler
1962
 * @out:  a pointer to an array of bytes to store the result
1963
 * @outlen:  the length of @out
1964
 * @in:  a pointer to an array of input bytes
1965
 * @inlen:  the length of @in
1966
 * @flush:  flush (ICU-related)
1967
 *
1968
 * Returns 0 if success, or
1969
 *     -1 by lack of space, or
1970
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1971
 *        the result of transformation can't fit into the encoding we want), or
1972
 *     -3 if there the last byte can't form a single output char.
1973
 *
1974
 * The value of @inlen after return is the number of octets consumed
1975
 *     as the return value is 0, else unpredictable.
1976
 * The value of @outlen after return is the number of octets produced.
1977
 */
1978
static int
1979
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1980
16.6k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
1981
16.6k
    int ret;
1982
16.6k
    (void)flush;
1983
1984
16.6k
    if (handler->input != NULL) {
1985
9.71k
        ret = handler->input(out, outlen, in, inlen);
1986
9.71k
        if (ret > 0)
1987
783
           ret = 0;
1988
9.71k
    }
1989
6.96k
#ifdef LIBXML_ICONV_ENABLED
1990
6.96k
    else if (handler->iconv_in != NULL) {
1991
6.96k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992
6.96k
    }
1993
1
#endif /* LIBXML_ICONV_ENABLED */
1994
#ifdef LIBXML_ICU_ENABLED
1995
    else if (handler->uconv_in != NULL) {
1996
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1997
                              flush);
1998
    }
1999
#endif /* LIBXML_ICU_ENABLED */
2000
1
    else {
2001
1
        *outlen = 0;
2002
1
        *inlen = 0;
2003
1
        ret = -2;
2004
1
    }
2005
2006
16.6k
    return(ret);
2007
16.6k
}
2008
2009
/**
2010
 * xmlEncOutputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 *
2017
 * Returns 0 if success, or
2018
 *     -1 by lack of space, or
2019
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2020
 *        the result of transformation can't fit into the encoding we want), or
2021
 *     -3 if there the last byte can't form a single output char.
2022
 *     -4 if no output function was found.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
0
                  int *outlen, const unsigned char *in, int *inlen) {
2031
0
    int ret;
2032
2033
0
    if (handler->output != NULL) {
2034
0
        ret = handler->output(out, outlen, in, inlen);
2035
0
        if (ret > 0)
2036
0
           ret = 0;
2037
0
    }
2038
0
#ifdef LIBXML_ICONV_ENABLED
2039
0
    else if (handler->iconv_out != NULL) {
2040
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041
0
    }
2042
0
#endif /* LIBXML_ICONV_ENABLED */
2043
#ifdef LIBXML_ICU_ENABLED
2044
    else if (handler->uconv_out != NULL) {
2045
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2046
                              1);
2047
    }
2048
#endif /* LIBXML_ICU_ENABLED */
2049
0
    else {
2050
0
        *outlen = 0;
2051
0
        *inlen = 0;
2052
0
        ret = -4;
2053
0
    }
2054
2055
0
    return(ret);
2056
0
}
2057
2058
/**
2059
 * xmlCharEncFirstLineInt:
2060
 * @handler:  char encoding transformation data structure
2061
 * @out:  an xmlBuffer for the output.
2062
 * @in:  an xmlBuffer for the input
2063
 * @len:  number of bytes to convert for the first line, or -1
2064
 *
2065
 * Front-end for the encoding handler input function, but handle only
2066
 * the very first line, i.e. limit itself to 45 chars.
2067
 *
2068
 * Returns the number of byte written if success, or
2069
 *     -1 general error
2070
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071
 *        the result of transformation can't fit into the encoding we want), or
2072
 */
2073
int
2074
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075
0
                       xmlBufferPtr in, int len) {
2076
0
    int ret;
2077
0
    int written;
2078
0
    int toconv;
2079
2080
0
    if (handler == NULL) return(-1);
2081
0
    if (out == NULL) return(-1);
2082
0
    if (in == NULL) return(-1);
2083
2084
    /* calculate space available */
2085
0
    written = out->size - out->use - 1; /* count '\0' */
2086
0
    toconv = in->use;
2087
    /*
2088
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089
     * 45 chars should be sufficient to reach the end of the encoding
2090
     * declaration without going too far inside the document content.
2091
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2092
     * The actual value depending on guessed encoding is passed as @len
2093
     * if provided
2094
     */
2095
0
    if (len >= 0) {
2096
0
        if (toconv > len)
2097
0
            toconv = len;
2098
0
    } else {
2099
0
        if (toconv > 180)
2100
0
            toconv = 180;
2101
0
    }
2102
0
    if (toconv * 2 >= written) {
2103
0
        xmlBufferGrow(out, toconv * 2);
2104
0
  written = out->size - out->use - 1;
2105
0
    }
2106
2107
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2108
0
                           in->content, &toconv, 0);
2109
0
    xmlBufferShrink(in, toconv);
2110
0
    out->use += written;
2111
0
    out->content[out->use] = 0;
2112
0
    if (ret == -1) ret = -3;
2113
2114
#ifdef DEBUG_ENCODING
2115
    switch (ret) {
2116
        case 0:
2117
      xmlGenericError(xmlGenericErrorContext,
2118
        "converted %d bytes to %d bytes of input\n",
2119
              toconv, written);
2120
      break;
2121
        case -1:
2122
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123
              toconv, written, in->use);
2124
      break;
2125
        case -2:
2126
      xmlGenericError(xmlGenericErrorContext,
2127
        "input conversion failed due to input error\n");
2128
      break;
2129
        case -3:
2130
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131
              toconv, written, in->use);
2132
      break;
2133
  default:
2134
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135
    }
2136
#endif /* DEBUG_ENCODING */
2137
    /*
2138
     * Ignore when input buffer is not on a boundary
2139
     */
2140
0
    if (ret == -3) ret = 0;
2141
0
    if (ret == -1) ret = 0;
2142
0
    return(written ? written : ret);
2143
0
}
2144
2145
/**
2146
 * xmlCharEncFirstLine:
2147
 * @handler:  char encoding transformation data structure
2148
 * @out:  an xmlBuffer for the output.
2149
 * @in:  an xmlBuffer for the input
2150
 *
2151
 * Front-end for the encoding handler input function, but handle only
2152
 * the very first line, i.e. limit itself to 45 chars.
2153
 *
2154
 * Returns the number of byte written if success, or
2155
 *     -1 general error
2156
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2157
 *        the result of transformation can't fit into the encoding we want), or
2158
 */
2159
int
2160
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161
0
                 xmlBufferPtr in) {
2162
0
    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163
0
}
2164
2165
/**
2166
 * xmlCharEncFirstLineInput:
2167
 * @input: a parser input buffer
2168
 * @len:  number of bytes to convert for the first line, or -1
2169
 *
2170
 * Front-end for the encoding handler input function, but handle only
2171
 * the very first line. Point is that this is based on autodetection
2172
 * of the encoding and once that first line is converted we may find
2173
 * out that a different decoder is needed to process the input.
2174
 *
2175
 * Returns the number of byte written if success, or
2176
 *     -1 general error
2177
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2178
 *        the result of transformation can't fit into the encoding we want), or
2179
 */
2180
int
2181
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182
2.97k
{
2183
2.97k
    int ret;
2184
2.97k
    size_t written;
2185
2.97k
    size_t toconv;
2186
2.97k
    int c_in;
2187
2.97k
    int c_out;
2188
2.97k
    xmlBufPtr in;
2189
2.97k
    xmlBufPtr out;
2190
2191
2.97k
    if ((input == NULL) || (input->encoder == NULL) ||
2192
2.97k
        (input->buffer == NULL) || (input->raw == NULL))
2193
0
        return (-1);
2194
2.97k
    out = input->buffer;
2195
2.97k
    in = input->raw;
2196
2197
2.97k
    toconv = xmlBufUse(in);
2198
2.97k
    if (toconv == 0)
2199
0
        return (0);
2200
2.97k
    written = xmlBufAvail(out) - 1; /* count '\0' */
2201
    /*
2202
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203
     * 45 chars should be sufficient to reach the end of the encoding
2204
     * declaration without going too far inside the document content.
2205
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2206
     * The actual value depending on guessed encoding is passed as @len
2207
     * if provided
2208
     */
2209
2.97k
    if (len >= 0) {
2210
720
        if (toconv > (unsigned int) len)
2211
165
            toconv = len;
2212
2.25k
    } else {
2213
2.25k
        if (toconv > 180)
2214
1.49k
            toconv = 180;
2215
2.25k
    }
2216
2.97k
    if (toconv * 2 >= written) {
2217
0
        xmlBufGrow(out, toconv * 2);
2218
0
        written = xmlBufAvail(out) - 1;
2219
0
    }
2220
2.97k
    if (written > 360)
2221
2.97k
        written = 360;
2222
2223
2.97k
    c_in = toconv;
2224
2.97k
    c_out = written;
2225
2.97k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2226
2.97k
                           xmlBufContent(in), &c_in, 0);
2227
2.97k
    xmlBufShrink(in, c_in);
2228
2.97k
    xmlBufAddLen(out, c_out);
2229
2.97k
    if (ret == -1)
2230
1.68k
        ret = -3;
2231
2232
2.97k
    switch (ret) {
2233
1.17k
        case 0:
2234
#ifdef DEBUG_ENCODING
2235
            xmlGenericError(xmlGenericErrorContext,
2236
                            "converted %d bytes to %d bytes of input\n",
2237
                            c_in, c_out);
2238
#endif
2239
1.17k
            break;
2240
0
        case -1:
2241
#ifdef DEBUG_ENCODING
2242
            xmlGenericError(xmlGenericErrorContext,
2243
                         "converted %d bytes to %d bytes of input, %d left\n",
2244
                            c_in, c_out, (int)xmlBufUse(in));
2245
#endif
2246
0
            break;
2247
1.70k
        case -3:
2248
#ifdef DEBUG_ENCODING
2249
            xmlGenericError(xmlGenericErrorContext,
2250
                        "converted %d bytes to %d bytes of input, %d left\n",
2251
                            c_in, c_out, (int)xmlBufUse(in));
2252
#endif
2253
1.70k
            break;
2254
92
        case -2: {
2255
92
            char buf[50];
2256
92
            const xmlChar *content = xmlBufContent(in);
2257
2258
92
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259
92
         content[0], content[1],
2260
92
         content[2], content[3]);
2261
92
      buf[49] = 0;
2262
92
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2263
92
        "input conversion failed due to input error, bytes %s\n",
2264
92
               buf);
2265
92
        }
2266
2.97k
    }
2267
    /*
2268
     * Ignore when input buffer is not on a boundary
2269
     */
2270
2.97k
    if (ret == -3) ret = 0;
2271
2.97k
    if (ret == -1) ret = 0;
2272
2.97k
    return(c_out ? c_out : ret);
2273
2.97k
}
2274
2275
/**
2276
 * xmlCharEncInput:
2277
 * @input: a parser input buffer
2278
 * @flush: try to flush all the raw buffer
2279
 *
2280
 * Generic front-end for the encoding handler on parser input
2281
 *
2282
 * Returns the number of byte written if success, or
2283
 *     -1 general error
2284
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2285
 *        the result of transformation can't fit into the encoding we want), or
2286
 */
2287
int
2288
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289
76.8k
{
2290
76.8k
    int ret;
2291
76.8k
    size_t written;
2292
76.8k
    size_t toconv;
2293
76.8k
    int c_in;
2294
76.8k
    int c_out;
2295
76.8k
    xmlBufPtr in;
2296
76.8k
    xmlBufPtr out;
2297
2298
76.8k
    if ((input == NULL) || (input->encoder == NULL) ||
2299
76.8k
        (input->buffer == NULL) || (input->raw == NULL))
2300
0
        return (-1);
2301
76.8k
    out = input->buffer;
2302
76.8k
    in = input->raw;
2303
2304
76.8k
    toconv = xmlBufUse(in);
2305
76.8k
    if (toconv == 0)
2306
63.0k
        return (0);
2307
13.7k
    if ((toconv > 64 * 1024) && (flush == 0))
2308
0
        toconv = 64 * 1024;
2309
13.7k
    written = xmlBufAvail(out);
2310
13.7k
    if (written > 0)
2311
13.7k
        written--; /* count '\0' */
2312
13.7k
    if (toconv * 2 >= written) {
2313
914
        xmlBufGrow(out, toconv * 2);
2314
914
        written = xmlBufAvail(out);
2315
914
        if (written > 0)
2316
914
            written--; /* count '\0' */
2317
914
    }
2318
13.7k
    if ((written > 128 * 1024) && (flush == 0))
2319
0
        written = 128 * 1024;
2320
2321
13.7k
    c_in = toconv;
2322
13.7k
    c_out = written;
2323
13.7k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2324
13.7k
                           xmlBufContent(in), &c_in, flush);
2325
13.7k
    xmlBufShrink(in, c_in);
2326
13.7k
    xmlBufAddLen(out, c_out);
2327
13.7k
    if (ret == -1)
2328
4.39k
        ret = -3;
2329
2330
13.7k
    switch (ret) {
2331
8.50k
        case 0:
2332
#ifdef DEBUG_ENCODING
2333
            xmlGenericError(xmlGenericErrorContext,
2334
                            "converted %d bytes to %d bytes of input\n",
2335
                            c_in, c_out);
2336
#endif
2337
8.50k
            break;
2338
0
        case -1:
2339
#ifdef DEBUG_ENCODING
2340
            xmlGenericError(xmlGenericErrorContext,
2341
                         "converted %d bytes to %d bytes of input, %d left\n",
2342
                            c_in, c_out, (int)xmlBufUse(in));
2343
#endif
2344
0
            break;
2345
4.57k
        case -3:
2346
#ifdef DEBUG_ENCODING
2347
            xmlGenericError(xmlGenericErrorContext,
2348
                        "converted %d bytes to %d bytes of input, %d left\n",
2349
                            c_in, c_out, (int)xmlBufUse(in));
2350
#endif
2351
4.57k
            break;
2352
639
        case -2: {
2353
639
            char buf[50];
2354
639
            const xmlChar *content = xmlBufContent(in);
2355
2356
639
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2357
639
         content[0], content[1],
2358
639
         content[2], content[3]);
2359
639
      buf[49] = 0;
2360
639
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2361
639
        "input conversion failed due to input error, bytes %s\n",
2362
639
               buf);
2363
639
        }
2364
13.7k
    }
2365
    /*
2366
     * Ignore when input buffer is not on a boundary
2367
     */
2368
13.7k
    if (ret == -3)
2369
4.57k
        ret = 0;
2370
13.7k
    return (c_out? c_out : ret);
2371
13.7k
}
2372
2373
/**
2374
 * xmlCharEncInFunc:
2375
 * @handler:  char encoding transformation data structure
2376
 * @out:  an xmlBuffer for the output.
2377
 * @in:  an xmlBuffer for the input
2378
 *
2379
 * Generic front-end for the encoding handler input function
2380
 *
2381
 * Returns the number of byte written if success, or
2382
 *     -1 general error
2383
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2384
 *        the result of transformation can't fit into the encoding we want), or
2385
 */
2386
int
2387
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2388
                 xmlBufferPtr in)
2389
0
{
2390
0
    int ret;
2391
0
    int written;
2392
0
    int toconv;
2393
2394
0
    if (handler == NULL)
2395
0
        return (-1);
2396
0
    if (out == NULL)
2397
0
        return (-1);
2398
0
    if (in == NULL)
2399
0
        return (-1);
2400
2401
0
    toconv = in->use;
2402
0
    if (toconv == 0)
2403
0
        return (0);
2404
0
    written = out->size - out->use -1; /* count '\0' */
2405
0
    if (toconv * 2 >= written) {
2406
0
        xmlBufferGrow(out, out->size + toconv * 2);
2407
0
        written = out->size - out->use - 1;
2408
0
    }
2409
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2410
0
                           in->content, &toconv, 1);
2411
0
    xmlBufferShrink(in, toconv);
2412
0
    out->use += written;
2413
0
    out->content[out->use] = 0;
2414
0
    if (ret == -1)
2415
0
        ret = -3;
2416
2417
0
    switch (ret) {
2418
0
        case 0:
2419
#ifdef DEBUG_ENCODING
2420
            xmlGenericError(xmlGenericErrorContext,
2421
                            "converted %d bytes to %d bytes of input\n",
2422
                            toconv, written);
2423
#endif
2424
0
            break;
2425
0
        case -1:
2426
#ifdef DEBUG_ENCODING
2427
            xmlGenericError(xmlGenericErrorContext,
2428
                         "converted %d bytes to %d bytes of input, %d left\n",
2429
                            toconv, written, in->use);
2430
#endif
2431
0
            break;
2432
0
        case -3:
2433
#ifdef DEBUG_ENCODING
2434
            xmlGenericError(xmlGenericErrorContext,
2435
                        "converted %d bytes to %d bytes of input, %d left\n",
2436
                            toconv, written, in->use);
2437
#endif
2438
0
            break;
2439
0
        case -2: {
2440
0
            char buf[50];
2441
2442
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2443
0
         in->content[0], in->content[1],
2444
0
         in->content[2], in->content[3]);
2445
0
      buf[49] = 0;
2446
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2447
0
        "input conversion failed due to input error, bytes %s\n",
2448
0
               buf);
2449
0
        }
2450
0
    }
2451
    /*
2452
     * Ignore when input buffer is not on a boundary
2453
     */
2454
0
    if (ret == -3)
2455
0
        ret = 0;
2456
0
    return (written? written : ret);
2457
0
}
2458
2459
#ifdef LIBXML_OUTPUT_ENABLED
2460
/**
2461
 * xmlCharEncOutput:
2462
 * @output: a parser output buffer
2463
 * @init: is this an initialization call without data
2464
 *
2465
 * Generic front-end for the encoding handler on parser output
2466
 * a first call with @init == 1 has to be made first to initiate the
2467
 * output in case of non-stateless encoding needing to initiate their
2468
 * state or the output (like the BOM in UTF16).
2469
 * In case of UTF8 sequence conversion errors for the given encoder,
2470
 * the content will be automatically remapped to a CharRef sequence.
2471
 *
2472
 * Returns the number of byte written if success, or
2473
 *     -1 general error
2474
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2475
 *        the result of transformation can't fit into the encoding we want), or
2476
 */
2477
int
2478
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2479
0
{
2480
0
    int ret;
2481
0
    size_t written;
2482
0
    int writtentot = 0;
2483
0
    size_t toconv;
2484
0
    int c_in;
2485
0
    int c_out;
2486
0
    xmlBufPtr in;
2487
0
    xmlBufPtr out;
2488
2489
0
    if ((output == NULL) || (output->encoder == NULL) ||
2490
0
        (output->buffer == NULL) || (output->conv == NULL))
2491
0
        return (-1);
2492
0
    out = output->conv;
2493
0
    in = output->buffer;
2494
2495
0
retry:
2496
2497
0
    written = xmlBufAvail(out);
2498
0
    if (written > 0)
2499
0
        written--; /* count '\0' */
2500
2501
    /*
2502
     * First specific handling of the initialization call
2503
     */
2504
0
    if (init) {
2505
0
        c_in = 0;
2506
0
        c_out = written;
2507
        /* TODO: Check return value. */
2508
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2509
0
                          NULL, &c_in);
2510
0
        xmlBufAddLen(out, c_out);
2511
#ifdef DEBUG_ENCODING
2512
  xmlGenericError(xmlGenericErrorContext,
2513
    "initialized encoder\n");
2514
#endif
2515
0
        return(c_out);
2516
0
    }
2517
2518
    /*
2519
     * Conversion itself.
2520
     */
2521
0
    toconv = xmlBufUse(in);
2522
0
    if (toconv == 0)
2523
0
        return (writtentot);
2524
0
    if (toconv > 64 * 1024)
2525
0
        toconv = 64 * 1024;
2526
0
    if (toconv * 4 >= written) {
2527
0
        xmlBufGrow(out, toconv * 4);
2528
0
        written = xmlBufAvail(out) - 1;
2529
0
    }
2530
0
    if (written > 256 * 1024)
2531
0
        written = 256 * 1024;
2532
2533
0
    c_in = toconv;
2534
0
    c_out = written;
2535
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2536
0
                            xmlBufContent(in), &c_in);
2537
0
    xmlBufShrink(in, c_in);
2538
0
    xmlBufAddLen(out, c_out);
2539
0
    writtentot += c_out;
2540
0
    if (ret == -1) {
2541
0
        if (c_out > 0) {
2542
            /* Can be a limitation of iconv or uconv */
2543
0
            goto retry;
2544
0
        }
2545
0
        ret = -3;
2546
0
    }
2547
2548
    /*
2549
     * Attempt to handle error cases
2550
     */
2551
0
    switch (ret) {
2552
0
        case 0:
2553
#ifdef DEBUG_ENCODING
2554
      xmlGenericError(xmlGenericErrorContext,
2555
        "converted %d bytes to %d bytes of output\n",
2556
              c_in, c_out);
2557
#endif
2558
0
      break;
2559
0
        case -1:
2560
#ifdef DEBUG_ENCODING
2561
      xmlGenericError(xmlGenericErrorContext,
2562
        "output conversion failed by lack of space\n");
2563
#endif
2564
0
      break;
2565
0
        case -3:
2566
#ifdef DEBUG_ENCODING
2567
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2568
              c_in, c_out, (int) xmlBufUse(in));
2569
#endif
2570
0
      break;
2571
0
        case -4:
2572
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2573
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2574
0
            ret = -1;
2575
0
            break;
2576
0
        case -2: {
2577
0
      xmlChar charref[20];
2578
0
      int len = (int) xmlBufUse(in);
2579
0
            xmlChar *content = xmlBufContent(in);
2580
0
      int cur, charrefLen;
2581
2582
0
      cur = xmlGetUTF8Char(content, &len);
2583
0
      if (cur <= 0)
2584
0
                break;
2585
2586
#ifdef DEBUG_ENCODING
2587
            xmlGenericError(xmlGenericErrorContext,
2588
                    "handling output conversion error\n");
2589
            xmlGenericError(xmlGenericErrorContext,
2590
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2591
                    content[0], content[1],
2592
                    content[2], content[3]);
2593
#endif
2594
            /*
2595
             * Removes the UTF8 sequence, and replace it by a charref
2596
             * and continue the transcoding phase, hoping the error
2597
             * did not mangle the encoder state.
2598
             */
2599
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2600
0
                             "&#%d;", cur);
2601
0
            xmlBufShrink(in, len);
2602
0
            xmlBufGrow(out, charrefLen * 4);
2603
0
            c_out = xmlBufAvail(out) - 1;
2604
0
            c_in = charrefLen;
2605
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2606
0
                                    charref, &c_in);
2607
2608
0
      if ((ret < 0) || (c_in != charrefLen)) {
2609
0
    char buf[50];
2610
2611
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2612
0
       content[0], content[1],
2613
0
       content[2], content[3]);
2614
0
    buf[49] = 0;
2615
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2616
0
        "output conversion failed due to conv error, bytes %s\n",
2617
0
             buf);
2618
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2619
0
        content[0] = ' ';
2620
0
                break;
2621
0
      }
2622
2623
0
            xmlBufAddLen(out, c_out);
2624
0
            writtentot += c_out;
2625
0
            goto retry;
2626
0
  }
2627
0
    }
2628
0
    return(writtentot ? writtentot : ret);
2629
0
}
2630
#endif
2631
2632
/**
2633
 * xmlCharEncOutFunc:
2634
 * @handler:  char encoding transformation data structure
2635
 * @out:  an xmlBuffer for the output.
2636
 * @in:  an xmlBuffer for the input
2637
 *
2638
 * Generic front-end for the encoding handler output function
2639
 * a first call with @in == NULL has to be made firs to initiate the
2640
 * output in case of non-stateless encoding needing to initiate their
2641
 * state or the output (like the BOM in UTF16).
2642
 * In case of UTF8 sequence conversion errors for the given encoder,
2643
 * the content will be automatically remapped to a CharRef sequence.
2644
 *
2645
 * Returns the number of byte written if success, or
2646
 *     -1 general error
2647
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2648
 *        the result of transformation can't fit into the encoding we want), or
2649
 */
2650
int
2651
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2652
0
                  xmlBufferPtr in) {
2653
0
    int ret;
2654
0
    int written;
2655
0
    int writtentot = 0;
2656
0
    int toconv;
2657
2658
0
    if (handler == NULL) return(-1);
2659
0
    if (out == NULL) return(-1);
2660
2661
0
retry:
2662
2663
0
    written = out->size - out->use;
2664
2665
0
    if (written > 0)
2666
0
  written--; /* Gennady: count '/0' */
2667
2668
    /*
2669
     * First specific handling of in = NULL, i.e. the initialization call
2670
     */
2671
0
    if (in == NULL) {
2672
0
        toconv = 0;
2673
        /* TODO: Check return value. */
2674
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2675
0
                          NULL, &toconv);
2676
0
        out->use += written;
2677
0
        out->content[out->use] = 0;
2678
#ifdef DEBUG_ENCODING
2679
  xmlGenericError(xmlGenericErrorContext,
2680
    "initialized encoder\n");
2681
#endif
2682
0
        return(0);
2683
0
    }
2684
2685
    /*
2686
     * Conversion itself.
2687
     */
2688
0
    toconv = in->use;
2689
0
    if (toconv == 0)
2690
0
  return(0);
2691
0
    if (toconv * 4 >= written) {
2692
0
        xmlBufferGrow(out, toconv * 4);
2693
0
  written = out->size - out->use - 1;
2694
0
    }
2695
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2696
0
                            in->content, &toconv);
2697
0
    xmlBufferShrink(in, toconv);
2698
0
    out->use += written;
2699
0
    writtentot += written;
2700
0
    out->content[out->use] = 0;
2701
0
    if (ret == -1) {
2702
0
        if (written > 0) {
2703
            /* Can be a limitation of iconv or uconv */
2704
0
            goto retry;
2705
0
        }
2706
0
        ret = -3;
2707
0
    }
2708
2709
    /*
2710
     * Attempt to handle error cases
2711
     */
2712
0
    switch (ret) {
2713
0
        case 0:
2714
#ifdef DEBUG_ENCODING
2715
      xmlGenericError(xmlGenericErrorContext,
2716
        "converted %d bytes to %d bytes of output\n",
2717
              toconv, written);
2718
#endif
2719
0
      break;
2720
0
        case -1:
2721
#ifdef DEBUG_ENCODING
2722
      xmlGenericError(xmlGenericErrorContext,
2723
        "output conversion failed by lack of space\n");
2724
#endif
2725
0
      break;
2726
0
        case -3:
2727
#ifdef DEBUG_ENCODING
2728
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2729
              toconv, written, in->use);
2730
#endif
2731
0
      break;
2732
0
        case -4:
2733
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2734
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2735
0
      ret = -1;
2736
0
            break;
2737
0
        case -2: {
2738
0
      xmlChar charref[20];
2739
0
      int len = in->use;
2740
0
      const xmlChar *utf = (const xmlChar *) in->content;
2741
0
      int cur, charrefLen;
2742
2743
0
      cur = xmlGetUTF8Char(utf, &len);
2744
0
      if (cur <= 0)
2745
0
                break;
2746
2747
#ifdef DEBUG_ENCODING
2748
            xmlGenericError(xmlGenericErrorContext,
2749
                    "handling output conversion error\n");
2750
            xmlGenericError(xmlGenericErrorContext,
2751
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2752
                    in->content[0], in->content[1],
2753
                    in->content[2], in->content[3]);
2754
#endif
2755
            /*
2756
             * Removes the UTF8 sequence, and replace it by a charref
2757
             * and continue the transcoding phase, hoping the error
2758
             * did not mangle the encoder state.
2759
             */
2760
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2761
0
                             "&#%d;", cur);
2762
0
            xmlBufferShrink(in, len);
2763
0
            xmlBufferGrow(out, charrefLen * 4);
2764
0
      written = out->size - out->use - 1;
2765
0
            toconv = charrefLen;
2766
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2767
0
                                    charref, &toconv);
2768
2769
0
      if ((ret < 0) || (toconv != charrefLen)) {
2770
0
    char buf[50];
2771
2772
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2773
0
       in->content[0], in->content[1],
2774
0
       in->content[2], in->content[3]);
2775
0
    buf[49] = 0;
2776
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2777
0
        "output conversion failed due to conv error, bytes %s\n",
2778
0
             buf);
2779
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2780
0
        in->content[0] = ' ';
2781
0
          break;
2782
0
      }
2783
2784
0
            out->use += written;
2785
0
            writtentot += written;
2786
0
            out->content[out->use] = 0;
2787
0
            goto retry;
2788
0
  }
2789
0
    }
2790
0
    return(writtentot ? writtentot : ret);
2791
0
}
2792
2793
/**
2794
 * xmlCharEncCloseFunc:
2795
 * @handler:  char encoding transformation data structure
2796
 *
2797
 * Generic front-end for encoding handler close function
2798
 *
2799
 * Returns 0 if success, or -1 in case of error
2800
 */
2801
int
2802
2.97k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2803
2.97k
    int ret = 0;
2804
2.97k
    int tofree = 0;
2805
2.97k
    int i, handler_in_list = 0;
2806
2807
    /* Avoid unused variable warning if features are disabled. */
2808
2.97k
    (void) handler_in_list;
2809
2810
2.97k
    if (handler == NULL) return(-1);
2811
2.97k
    if (handler->name == NULL) return(-1);
2812
2.97k
    if (handlers != NULL) {
2813
22.3k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2814
20.1k
            if (handler == handlers[i]) {
2815
717
          handler_in_list = 1;
2816
717
    break;
2817
717
      }
2818
20.1k
  }
2819
2.97k
    }
2820
2.97k
#ifdef LIBXML_ICONV_ENABLED
2821
    /*
2822
     * Iconv handlers can be used only once, free the whole block.
2823
     * and the associated icon resources.
2824
     */
2825
2.97k
    if ((handler_in_list == 0) &&
2826
2.97k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2827
2.25k
        tofree = 1;
2828
2.25k
  if (handler->iconv_out != NULL) {
2829
2.25k
      if (iconv_close(handler->iconv_out))
2830
0
    ret = -1;
2831
2.25k
      handler->iconv_out = NULL;
2832
2.25k
  }
2833
2.25k
  if (handler->iconv_in != NULL) {
2834
2.25k
      if (iconv_close(handler->iconv_in))
2835
0
    ret = -1;
2836
2.25k
      handler->iconv_in = NULL;
2837
2.25k
  }
2838
2.25k
    }
2839
2.97k
#endif /* LIBXML_ICONV_ENABLED */
2840
#ifdef LIBXML_ICU_ENABLED
2841
    if ((handler_in_list == 0) &&
2842
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2843
        tofree = 1;
2844
  if (handler->uconv_out != NULL) {
2845
      closeIcuConverter(handler->uconv_out);
2846
      handler->uconv_out = NULL;
2847
  }
2848
  if (handler->uconv_in != NULL) {
2849
      closeIcuConverter(handler->uconv_in);
2850
      handler->uconv_in = NULL;
2851
  }
2852
    }
2853
#endif
2854
2.97k
    if (tofree) {
2855
        /* free up only dynamic handlers iconv/uconv */
2856
2.25k
        if (handler->name != NULL)
2857
2.25k
            xmlFree(handler->name);
2858
2.25k
        handler->name = NULL;
2859
2.25k
        xmlFree(handler);
2860
2.25k
    }
2861
#ifdef DEBUG_ENCODING
2862
    if (ret)
2863
        xmlGenericError(xmlGenericErrorContext,
2864
    "failed to close the encoding handler\n");
2865
    else
2866
        xmlGenericError(xmlGenericErrorContext,
2867
    "closed the encoding handler\n");
2868
#endif
2869
2870
2.97k
    return(ret);
2871
2.97k
}
2872
2873
/**
2874
 * xmlByteConsumed:
2875
 * @ctxt: an XML parser context
2876
 *
2877
 * This function provides the current index of the parser relative
2878
 * to the start of the current entity. This function is computed in
2879
 * bytes from the beginning starting at zero and finishing at the
2880
 * size in byte of the file if parsing a file. The function is
2881
 * of constant cost if the input is UTF-8 but can be costly if run
2882
 * on non-UTF-8 input.
2883
 *
2884
 * Returns the index in bytes from the beginning of the entity or -1
2885
 *         in case the index could not be computed.
2886
 */
2887
long
2888
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2889
0
    xmlParserInputPtr in;
2890
2891
0
    if (ctxt == NULL) return(-1);
2892
0
    in = ctxt->input;
2893
0
    if (in == NULL)  return(-1);
2894
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2895
0
        unsigned int unused = 0;
2896
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2897
        /*
2898
   * Encoding conversion, compute the number of unused original
2899
   * bytes from the input not consumed and subtract that from
2900
   * the raw consumed value, this is not a cheap operation
2901
   */
2902
0
        if (in->end - in->cur > 0) {
2903
0
      unsigned char convbuf[32000];
2904
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2905
0
      int toconv = in->end - in->cur, written = 32000;
2906
2907
0
      int ret;
2908
2909
0
            do {
2910
0
                toconv = in->end - cur;
2911
0
                written = 32000;
2912
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2913
0
                                        cur, &toconv);
2914
0
                if (ret < 0) {
2915
0
                    if (written > 0)
2916
0
                        ret = -2;
2917
0
                    else
2918
0
                        return(-1);
2919
0
                }
2920
0
                unused += written;
2921
0
                cur += toconv;
2922
0
            } while (ret == -2);
2923
0
  }
2924
0
  if (in->buf->rawconsumed < unused)
2925
0
      return(-1);
2926
0
  return(in->buf->rawconsumed - unused);
2927
0
    }
2928
0
    return(in->consumed + (in->cur - in->base));
2929
0
}
2930
2931
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2932
#ifdef LIBXML_ISO8859X_ENABLED
2933
2934
/**
2935
 * UTF8ToISO8859x:
2936
 * @out:  a pointer to an array of bytes to store the result
2937
 * @outlen:  the length of @out
2938
 * @in:  a pointer to an array of UTF-8 chars
2939
 * @inlen:  the length of @in
2940
 * @xlattable: the 2-level transcoding table
2941
 *
2942
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2943
 * block of chars out.
2944
 *
2945
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2946
 * The value of @inlen after return is the number of octets consumed
2947
 *     as the return value is positive, else unpredictable.
2948
 * The value of @outlen after return is the number of octets consumed.
2949
 */
2950
static int
2951
UTF8ToISO8859x(unsigned char* out, int *outlen,
2952
              const unsigned char* in, int *inlen,
2953
              const unsigned char* const xlattable) {
2954
    const unsigned char* outstart = out;
2955
    const unsigned char* inend;
2956
    const unsigned char* instart = in;
2957
    const unsigned char* processed = in;
2958
2959
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2960
        (xlattable == NULL))
2961
  return(-1);
2962
    if (in == NULL) {
2963
        /*
2964
        * initialization nothing to do
2965
        */
2966
        *outlen = 0;
2967
        *inlen = 0;
2968
        return(0);
2969
    }
2970
    inend = in + (*inlen);
2971
    while (in < inend) {
2972
        unsigned char d = *in++;
2973
        if  (d < 0x80)  {
2974
            *out++ = d;
2975
        } else if (d < 0xC0) {
2976
            /* trailing byte in leading position */
2977
            *outlen = out - outstart;
2978
            *inlen = processed - instart;
2979
            return(-2);
2980
        } else if (d < 0xE0) {
2981
            unsigned char c;
2982
            if (!(in < inend)) {
2983
                /* trailing byte not in input buffer */
2984
                *outlen = out - outstart;
2985
                *inlen = processed - instart;
2986
                return(-3);
2987
            }
2988
            c = *in++;
2989
            if ((c & 0xC0) != 0x80) {
2990
                /* not a trailing byte */
2991
                *outlen = out - outstart;
2992
                *inlen = processed - instart;
2993
                return(-2);
2994
            }
2995
            c = c & 0x3F;
2996
            d = d & 0x1F;
2997
            d = xlattable [48 + c + xlattable [d] * 64];
2998
            if (d == 0) {
2999
                /* not in character set */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-2);
3003
            }
3004
            *out++ = d;
3005
        } else if (d < 0xF0) {
3006
            unsigned char c1;
3007
            unsigned char c2;
3008
            if (!(in < inend - 1)) {
3009
                /* trailing bytes not in input buffer */
3010
                *outlen = out - outstart;
3011
                *inlen = processed - instart;
3012
                return(-3);
3013
            }
3014
            c1 = *in++;
3015
            if ((c1 & 0xC0) != 0x80) {
3016
                /* not a trailing byte (c1) */
3017
                *outlen = out - outstart;
3018
                *inlen = processed - instart;
3019
                return(-2);
3020
            }
3021
            c2 = *in++;
3022
            if ((c2 & 0xC0) != 0x80) {
3023
                /* not a trailing byte (c2) */
3024
                *outlen = out - outstart;
3025
                *inlen = processed - instart;
3026
                return(-2);
3027
            }
3028
            c1 = c1 & 0x3F;
3029
            c2 = c2 & 0x3F;
3030
      d = d & 0x0F;
3031
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3032
      xlattable [32 + d] * 64] * 64];
3033
            if (d == 0) {
3034
                /* not in character set */
3035
                *outlen = out - outstart;
3036
                *inlen = processed - instart;
3037
                return(-2);
3038
            }
3039
            *out++ = d;
3040
        } else {
3041
            /* cannot transcode >= U+010000 */
3042
            *outlen = out - outstart;
3043
            *inlen = processed - instart;
3044
            return(-2);
3045
        }
3046
        processed = in;
3047
    }
3048
    *outlen = out - outstart;
3049
    *inlen = processed - instart;
3050
    return(*outlen);
3051
}
3052
3053
/**
3054
 * ISO8859xToUTF8
3055
 * @out:  a pointer to an array of bytes to store the result
3056
 * @outlen:  the length of @out
3057
 * @in:  a pointer to an array of ISO Latin 1 chars
3058
 * @inlen:  the length of @in
3059
 *
3060
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3061
 * block of chars out.
3062
 * Returns 0 if success, or -1 otherwise
3063
 * The value of @inlen after return is the number of octets consumed
3064
 * The value of @outlen after return is the number of octets produced.
3065
 */
3066
static int
3067
ISO8859xToUTF8(unsigned char* out, int *outlen,
3068
              const unsigned char* in, int *inlen,
3069
              unsigned short const *unicodetable) {
3070
    unsigned char* outstart = out;
3071
    unsigned char* outend;
3072
    const unsigned char* instart = in;
3073
    const unsigned char* inend;
3074
    const unsigned char* instop;
3075
    unsigned int c;
3076
3077
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3078
        (in == NULL) || (unicodetable == NULL))
3079
  return(-1);
3080
    outend = out + *outlen;
3081
    inend = in + *inlen;
3082
    instop = inend;
3083
3084
    while ((in < inend) && (out < outend - 2)) {
3085
        if (*in >= 0x80) {
3086
            c = unicodetable [*in - 0x80];
3087
            if (c == 0) {
3088
                /* undefined code point */
3089
                *outlen = out - outstart;
3090
                *inlen = in - instart;
3091
                return (-1);
3092
            }
3093
            if (c < 0x800) {
3094
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3095
                *out++ = (c & 0x3F) | 0x80;
3096
            } else {
3097
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3098
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3099
                *out++ = (c & 0x3F) | 0x80;
3100
            }
3101
            ++in;
3102
        }
3103
        if (instop - in > outend - out) instop = in + (outend - out);
3104
        while ((*in < 0x80) && (in < instop)) {
3105
            *out++ = *in++;
3106
        }
3107
    }
3108
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3109
        *out++ =  *in++;
3110
    }
3111
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3112
        *out++ =  *in++;
3113
    }
3114
    *outlen = out - outstart;
3115
    *inlen = in - instart;
3116
    return (*outlen);
3117
}
3118
3119
3120
/************************************************************************
3121
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3122
 ************************************************************************/
3123
3124
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3125
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3126
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3127
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3128
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3129
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3130
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3131
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3132
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3133
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3134
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3135
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3136
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3137
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3138
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3139
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3140
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3141
};
3142
3143
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3144
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3145
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3152
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3153
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3154
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3155
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3156
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3157
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3159
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3160
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3164
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3165
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3166
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3167
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3168
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3169
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3170
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3171
};
3172
3173
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3174
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3175
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3176
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3177
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3178
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3179
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3180
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3181
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3182
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3183
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3184
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3185
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3186
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3187
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3188
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3189
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3190
};
3191
3192
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3193
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3194
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3200
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3201
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3202
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3203
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3205
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3206
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3218
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3219
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3220
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3222
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3224
};
3225
3226
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3227
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3228
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3229
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3230
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3231
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3232
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3233
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3234
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3235
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3236
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3237
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3238
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3239
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3240
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3241
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3242
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3243
};
3244
3245
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3246
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3247
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3254
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3255
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3256
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3257
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3258
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3260
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3264
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3270
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3271
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3272
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3273
};
3274
3275
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3276
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3277
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3278
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3279
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3280
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3281
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3282
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3283
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3284
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3285
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3286
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3287
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3288
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3289
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3290
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3291
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3292
};
3293
3294
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3295
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3303
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3304
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3307
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3308
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3309
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3310
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3311
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
};
3323
3324
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3325
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3326
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3327
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3328
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3329
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3330
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3331
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3332
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3333
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3334
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3335
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3336
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3337
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3338
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3339
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3340
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3341
};
3342
3343
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3344
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3346
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3352
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3353
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3354
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3361
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3362
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3363
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3364
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
};
3368
3369
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3370
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3371
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3372
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3373
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3374
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3375
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3376
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3377
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3378
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3379
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3380
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3381
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3382
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3383
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3384
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3385
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3386
};
3387
3388
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3389
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3390
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3397
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3398
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3399
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3400
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3413
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3414
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3415
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3416
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
};
3421
3422
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3423
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3424
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3425
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3426
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3427
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3428
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3429
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3430
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3431
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3432
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3433
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3434
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3435
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3436
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3437
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3438
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3439
};
3440
3441
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3442
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3450
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3451
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3452
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3453
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3466
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3471
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
};
3474
3475
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3476
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3477
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3478
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3479
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3480
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3481
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3482
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3483
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3484
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3485
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3486
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3487
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3488
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3489
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3490
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3491
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3492
};
3493
3494
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3495
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3500
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3502
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3503
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3504
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3505
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3506
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3507
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3508
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3509
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
};
3519
3520
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3521
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3522
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3523
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3524
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3525
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3526
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3527
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3528
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3529
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3530
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3531
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3532
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3533
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3534
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3535
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3536
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3537
};
3538
3539
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3540
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3548
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3549
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3550
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3552
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3553
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3554
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3556
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3558
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3559
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3568
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3569
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3570
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3571
};
3572
3573
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3574
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3575
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3576
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3577
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3578
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3579
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3580
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3581
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3582
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3583
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3584
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3585
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3586
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3587
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3588
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3589
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3590
};
3591
3592
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3593
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3601
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3602
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3603
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3604
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3605
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3606
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3608
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3609
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3610
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3611
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3612
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3617
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3618
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
};
3621
3622
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3623
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3624
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3625
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3626
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3627
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3628
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3629
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3630
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3631
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3632
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3633
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3634
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3635
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3636
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3637
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3638
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3639
};
3640
3641
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3642
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3645
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3647
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3650
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3651
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3652
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3653
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3656
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3657
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3663
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3665
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3666
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3667
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3668
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3669
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3670
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3672
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3673
};
3674
3675
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3676
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3677
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3678
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3679
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3680
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3681
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3682
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3683
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3684
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3685
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3686
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3687
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3688
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3689
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3690
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3691
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3692
};
3693
3694
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3695
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3703
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3704
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3705
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3710
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3711
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3730
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3735
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3736
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3737
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3738
};
3739
3740
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3741
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3742
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3743
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3744
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3745
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3746
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3747
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3748
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3749
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3750
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3751
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3752
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3753
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3754
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3755
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3756
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3757
};
3758
3759
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3760
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3762
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3763
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3764
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3765
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3768
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3769
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3770
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3771
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3772
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3778
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3783
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3784
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3785
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3786
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3787
};
3788
3789
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3790
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3791
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3792
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3793
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3794
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3795
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3796
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3797
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3798
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3799
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3800
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3801
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3802
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3803
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3804
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3805
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3806
};
3807
3808
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3809
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3810
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3815
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3817
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3818
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3819
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3820
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3821
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3822
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3826
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3828
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3835
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3836
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3842
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3845
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3846
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3847
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3848
};
3849
3850
3851
/*
3852
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3853
 */
3854
3855
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3856
    const unsigned char* in, int *inlen) {
3857
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3858
}
3859
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3860
    const unsigned char* in, int *inlen) {
3861
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3862
}
3863
3864
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3865
    const unsigned char* in, int *inlen) {
3866
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3867
}
3868
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3869
    const unsigned char* in, int *inlen) {
3870
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3871
}
3872
3873
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3874
    const unsigned char* in, int *inlen) {
3875
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3876
}
3877
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3878
    const unsigned char* in, int *inlen) {
3879
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3880
}
3881
3882
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3883
    const unsigned char* in, int *inlen) {
3884
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3885
}
3886
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3887
    const unsigned char* in, int *inlen) {
3888
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3889
}
3890
3891
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3892
    const unsigned char* in, int *inlen) {
3893
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3894
}
3895
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3896
    const unsigned char* in, int *inlen) {
3897
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3898
}
3899
3900
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3901
    const unsigned char* in, int *inlen) {
3902
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3903
}
3904
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3905
    const unsigned char* in, int *inlen) {
3906
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3907
}
3908
3909
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3910
    const unsigned char* in, int *inlen) {
3911
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3912
}
3913
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3914
    const unsigned char* in, int *inlen) {
3915
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3916
}
3917
3918
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3919
    const unsigned char* in, int *inlen) {
3920
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3921
}
3922
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3923
    const unsigned char* in, int *inlen) {
3924
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3925
}
3926
3927
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3928
    const unsigned char* in, int *inlen) {
3929
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3930
}
3931
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3932
    const unsigned char* in, int *inlen) {
3933
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3934
}
3935
3936
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3937
    const unsigned char* in, int *inlen) {
3938
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3939
}
3940
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3941
    const unsigned char* in, int *inlen) {
3942
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3943
}
3944
3945
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3946
    const unsigned char* in, int *inlen) {
3947
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3948
}
3949
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3950
    const unsigned char* in, int *inlen) {
3951
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3952
}
3953
3954
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3955
    const unsigned char* in, int *inlen) {
3956
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3957
}
3958
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3959
    const unsigned char* in, int *inlen) {
3960
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3961
}
3962
3963
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3964
    const unsigned char* in, int *inlen) {
3965
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3966
}
3967
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3968
    const unsigned char* in, int *inlen) {
3969
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3970
}
3971
3972
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3973
    const unsigned char* in, int *inlen) {
3974
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3975
}
3976
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3977
    const unsigned char* in, int *inlen) {
3978
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3979
}
3980
3981
static void
3982
xmlRegisterCharEncodingHandlersISO8859x (void) {
3983
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3984
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3985
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3986
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3987
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3988
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3989
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3990
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3991
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3992
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3993
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3994
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3995
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3996
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3997
}
3998
3999
#endif
4000
#endif
4001