Coverage Report

Created: 2023-06-07 06:05

/src/libxml2-2.10.3/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "buf.h"
44
#include "enc.h"
45
46
#ifdef LIBXML_ICU_ENABLED
47
#include <unicode/ucnv.h>
48
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49
#define ICU_PIVOT_BUF_SIZE 1024
50
typedef struct _uconv_t uconv_t;
51
struct _uconv_t {
52
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
55
  UChar      *pivot_source;
56
  UChar      *pivot_target;
57
};
58
#endif
59
60
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62
63
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65
struct _xmlCharEncodingAlias {
66
    const char *name;
67
    const char *alias;
68
};
69
70
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71
static int xmlCharEncodingAliasesNb = 0;
72
static int xmlCharEncodingAliasesMax = 0;
73
74
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75
#if 0
76
#define DEBUG_ENCODING  /* Define this to get encoding traces */
77
#endif
78
#else
79
#ifdef LIBXML_ISO8859X_ENABLED
80
static void xmlRegisterCharEncodingHandlersISO8859x (void);
81
#endif
82
#endif
83
84
static int xmlLittleEndian = 1;
85
86
/**
87
 * xmlEncodingErrMemory:
88
 * @extra:  extra information
89
 *
90
 * Handle an out of memory condition
91
 */
92
static void
93
xmlEncodingErrMemory(const char *extra)
94
0
{
95
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96
0
}
97
98
/**
99
 * xmlErrEncoding:
100
 * @error:  the error number
101
 * @msg:  the error message
102
 *
103
 * n encoding error
104
 */
105
static void LIBXML_ATTR_FORMAT(2,0)
106
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107
671
{
108
671
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109
671
                    XML_FROM_I18N, error, XML_ERR_FATAL,
110
671
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111
671
}
112
113
#ifdef LIBXML_ICU_ENABLED
114
static uconv_t*
115
openIcuConverter(const char* name, int toUnicode)
116
{
117
  UErrorCode status = U_ZERO_ERROR;
118
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119
  if (conv == NULL)
120
    return NULL;
121
122
  conv->pivot_source = conv->pivot_buf;
123
  conv->pivot_target = conv->pivot_buf;
124
125
  conv->uconv = ucnv_open(name, &status);
126
  if (U_FAILURE(status))
127
    goto error;
128
129
  status = U_ZERO_ERROR;
130
  if (toUnicode) {
131
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132
                        NULL, NULL, NULL, &status);
133
  }
134
  else {
135
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136
                        NULL, NULL, NULL, &status);
137
  }
138
  if (U_FAILURE(status))
139
    goto error;
140
141
  status = U_ZERO_ERROR;
142
  conv->utf8 = ucnv_open("UTF-8", &status);
143
  if (U_SUCCESS(status))
144
    return conv;
145
146
error:
147
  if (conv->uconv)
148
    ucnv_close(conv->uconv);
149
  xmlFree(conv);
150
  return NULL;
151
}
152
153
static void
154
closeIcuConverter(uconv_t *conv)
155
{
156
  if (conv != NULL) {
157
    ucnv_close(conv->uconv);
158
    ucnv_close(conv->utf8);
159
    xmlFree(conv);
160
  }
161
}
162
#endif /* LIBXML_ICU_ENABLED */
163
164
/************************************************************************
165
 *                  *
166
 *    Conversions To/From UTF8 encoding     *
167
 *                  *
168
 ************************************************************************/
169
170
/**
171
 * asciiToUTF8:
172
 * @out:  a pointer to an array of bytes to store the result
173
 * @outlen:  the length of @out
174
 * @in:  a pointer to an array of ASCII chars
175
 * @inlen:  the length of @in
176
 *
177
 * Take a block of ASCII chars in and try to convert it to an UTF-8
178
 * block of chars out.
179
 * Returns 0 if success, or -1 otherwise
180
 * The value of @inlen after return is the number of octets consumed
181
 *     if the return value is positive, else unpredictable.
182
 * The value of @outlen after return is the number of octets produced.
183
 */
184
static int
185
asciiToUTF8(unsigned char* out, int *outlen,
186
18.7k
              const unsigned char* in, int *inlen) {
187
18.7k
    unsigned char* outstart = out;
188
18.7k
    const unsigned char* base = in;
189
18.7k
    const unsigned char* processed = in;
190
18.7k
    unsigned char* outend = out + *outlen;
191
18.7k
    const unsigned char* inend;
192
18.7k
    unsigned int c;
193
194
18.7k
    inend = in + (*inlen);
195
968k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
196
967k
  c= *in++;
197
198
967k
        if (out >= outend)
199
0
      break;
200
967k
        if (c < 0x80) {
201
949k
      *out++ = c;
202
949k
  } else {
203
18.4k
      *outlen = out - outstart;
204
18.4k
      *inlen = processed - base;
205
18.4k
      return(-1);
206
18.4k
  }
207
208
949k
  processed = (const unsigned char*) in;
209
949k
    }
210
363
    *outlen = out - outstart;
211
363
    *inlen = processed - base;
212
363
    return(*outlen);
213
18.7k
}
214
215
#ifdef LIBXML_OUTPUT_ENABLED
216
/**
217
 * UTF8Toascii:
218
 * @out:  a pointer to an array of bytes to store the result
219
 * @outlen:  the length of @out
220
 * @in:  a pointer to an array of UTF-8 chars
221
 * @inlen:  the length of @in
222
 *
223
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
224
 * block of chars out.
225
 *
226
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227
 * The value of @inlen after return is the number of octets consumed
228
 *     if the return value is positive, else unpredictable.
229
 * The value of @outlen after return is the number of octets produced.
230
 */
231
static int
232
UTF8Toascii(unsigned char* out, int *outlen,
233
0
              const unsigned char* in, int *inlen) {
234
0
    const unsigned char* processed = in;
235
0
    const unsigned char* outend;
236
0
    const unsigned char* outstart = out;
237
0
    const unsigned char* instart = in;
238
0
    const unsigned char* inend;
239
0
    unsigned int c, d;
240
0
    int trailing;
241
242
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243
0
    if (in == NULL) {
244
        /*
245
   * initialization nothing to do
246
   */
247
0
  *outlen = 0;
248
0
  *inlen = 0;
249
0
  return(0);
250
0
    }
251
0
    inend = in + (*inlen);
252
0
    outend = out + (*outlen);
253
0
    while (in < inend) {
254
0
  d = *in++;
255
0
  if      (d < 0x80)  { c= d; trailing= 0; }
256
0
  else if (d < 0xC0) {
257
      /* trailing byte in leading position */
258
0
      *outlen = out - outstart;
259
0
      *inlen = processed - instart;
260
0
      return(-2);
261
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
262
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
263
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
264
0
  else {
265
      /* no chance for this in Ascii */
266
0
      *outlen = out - outstart;
267
0
      *inlen = processed - instart;
268
0
      return(-2);
269
0
  }
270
271
0
  if (inend - in < trailing) {
272
0
      break;
273
0
  }
274
275
0
  for ( ; trailing; trailing--) {
276
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277
0
    break;
278
0
      c <<= 6;
279
0
      c |= d & 0x3F;
280
0
  }
281
282
  /* assertion: c is a single UTF-4 value */
283
0
  if (c < 0x80) {
284
0
      if (out >= outend)
285
0
    break;
286
0
      *out++ = c;
287
0
  } else {
288
      /* no chance for this in Ascii */
289
0
      *outlen = out - outstart;
290
0
      *inlen = processed - instart;
291
0
      return(-2);
292
0
  }
293
0
  processed = in;
294
0
    }
295
0
    *outlen = out - outstart;
296
0
    *inlen = processed - instart;
297
0
    return(*outlen);
298
0
}
299
#endif /* LIBXML_OUTPUT_ENABLED */
300
301
/**
302
 * isolat1ToUTF8:
303
 * @out:  a pointer to an array of bytes to store the result
304
 * @outlen:  the length of @out
305
 * @in:  a pointer to an array of ISO Latin 1 chars
306
 * @inlen:  the length of @in
307
 *
308
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309
 * block of chars out.
310
 * Returns the number of bytes written if success, or -1 otherwise
311
 * The value of @inlen after return is the number of octets consumed
312
 *     if the return value is positive, else unpredictable.
313
 * The value of @outlen after return is the number of octets produced.
314
 */
315
int
316
isolat1ToUTF8(unsigned char* out, int *outlen,
317
286
              const unsigned char* in, int *inlen) {
318
286
    unsigned char* outstart = out;
319
286
    const unsigned char* base = in;
320
286
    unsigned char* outend;
321
286
    const unsigned char* inend;
322
286
    const unsigned char* instop;
323
324
286
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325
0
  return(-1);
326
327
286
    outend = out + *outlen;
328
286
    inend = in + (*inlen);
329
286
    instop = inend;
330
331
89.7k
    while ((in < inend) && (out < outend - 1)) {
332
89.4k
  if (*in >= 0x80) {
333
89.1k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
334
89.1k
            *out++ = ((*in) & 0x3F) | 0x80;
335
89.1k
      ++in;
336
89.1k
  }
337
89.4k
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
338
600k
  while ((in < instop) && (*in < 0x80)) {
339
510k
      *out++ = *in++;
340
510k
  }
341
89.4k
    }
342
286
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
343
0
        *out++ = *in++;
344
0
    }
345
286
    *outlen = out - outstart;
346
286
    *inlen = in - base;
347
286
    return(*outlen);
348
286
}
349
350
/**
351
 * UTF8ToUTF8:
352
 * @out:  a pointer to an array of bytes to store the result
353
 * @outlen:  the length of @out
354
 * @inb:  a pointer to an array of UTF-8 chars
355
 * @inlenb:  the length of @in in UTF-8 chars
356
 *
357
 * No op copy operation for UTF8 handling.
358
 *
359
 * Returns the number of bytes written, or -1 if lack of space.
360
 *     The value of *inlen after return is the number of octets consumed
361
 *     if the return value is positive, else unpredictable.
362
 */
363
static int
364
UTF8ToUTF8(unsigned char* out, int *outlen,
365
           const unsigned char* inb, int *inlenb)
366
0
{
367
0
    int len;
368
369
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370
0
  return(-1);
371
0
    if (inb == NULL) {
372
        /* inb == NULL means output is initialized. */
373
0
        *outlen = 0;
374
0
        *inlenb = 0;
375
0
        return(0);
376
0
    }
377
0
    if (*outlen > *inlenb) {
378
0
  len = *inlenb;
379
0
    } else {
380
0
  len = *outlen;
381
0
    }
382
0
    if (len < 0)
383
0
  return(-1);
384
385
    /*
386
     * FIXME: Conversion functions must assure valid UTF-8, so we have
387
     * to check for UTF-8 validity. Preferably, this converter shouldn't
388
     * be used at all.
389
     */
390
0
    memcpy(out, inb, len);
391
392
0
    *outlen = len;
393
0
    *inlenb = len;
394
0
    return(*outlen);
395
0
}
396
397
398
#ifdef LIBXML_OUTPUT_ENABLED
399
/**
400
 * UTF8Toisolat1:
401
 * @out:  a pointer to an array of bytes to store the result
402
 * @outlen:  the length of @out
403
 * @in:  a pointer to an array of UTF-8 chars
404
 * @inlen:  the length of @in
405
 *
406
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407
 * block of chars out.
408
 *
409
 * Returns the number of bytes written if success, -2 if the transcoding fails,
410
           or -1 otherwise
411
 * The value of @inlen after return is the number of octets consumed
412
 *     if the return value is positive, else unpredictable.
413
 * The value of @outlen after return is the number of octets produced.
414
 */
415
int
416
UTF8Toisolat1(unsigned char* out, int *outlen,
417
0
              const unsigned char* in, int *inlen) {
418
0
    const unsigned char* processed = in;
419
0
    const unsigned char* outend;
420
0
    const unsigned char* outstart = out;
421
0
    const unsigned char* instart = in;
422
0
    const unsigned char* inend;
423
0
    unsigned int c, d;
424
0
    int trailing;
425
426
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427
0
    if (in == NULL) {
428
        /*
429
   * initialization nothing to do
430
   */
431
0
  *outlen = 0;
432
0
  *inlen = 0;
433
0
  return(0);
434
0
    }
435
0
    inend = in + (*inlen);
436
0
    outend = out + (*outlen);
437
0
    while (in < inend) {
438
0
  d = *in++;
439
0
  if      (d < 0x80)  { c= d; trailing= 0; }
440
0
  else if (d < 0xC0) {
441
      /* trailing byte in leading position */
442
0
      *outlen = out - outstart;
443
0
      *inlen = processed - instart;
444
0
      return(-2);
445
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
446
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
447
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
448
0
  else {
449
      /* no chance for this in IsoLat1 */
450
0
      *outlen = out - outstart;
451
0
      *inlen = processed - instart;
452
0
      return(-2);
453
0
  }
454
455
0
  if (inend - in < trailing) {
456
0
      break;
457
0
  }
458
459
0
  for ( ; trailing; trailing--) {
460
0
      if (in >= inend)
461
0
    break;
462
0
      if (((d= *in++) & 0xC0) != 0x80) {
463
0
    *outlen = out - outstart;
464
0
    *inlen = processed - instart;
465
0
    return(-2);
466
0
      }
467
0
      c <<= 6;
468
0
      c |= d & 0x3F;
469
0
  }
470
471
  /* assertion: c is a single UTF-4 value */
472
0
  if (c <= 0xFF) {
473
0
      if (out >= outend)
474
0
    break;
475
0
      *out++ = c;
476
0
  } else {
477
      /* no chance for this in IsoLat1 */
478
0
      *outlen = out - outstart;
479
0
      *inlen = processed - instart;
480
0
      return(-2);
481
0
  }
482
0
  processed = in;
483
0
    }
484
0
    *outlen = out - outstart;
485
0
    *inlen = processed - instart;
486
0
    return(*outlen);
487
0
}
488
#endif /* LIBXML_OUTPUT_ENABLED */
489
490
/**
491
 * UTF16LEToUTF8:
492
 * @out:  a pointer to an array of bytes to store the result
493
 * @outlen:  the length of @out
494
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
495
 * @inlenb:  the length of @in in UTF-16LE chars
496
 *
497
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498
 * block of chars out. This function assumes the endian property
499
 * is the same between the native type of this machine and the
500
 * inputed one.
501
 *
502
 * Returns the number of bytes written, or -1 if lack of space, or -2
503
 *     if the transcoding fails (if *in is not a valid utf16 string)
504
 *     The value of *inlen after return is the number of octets consumed
505
 *     if the return value is positive, else unpredictable.
506
 */
507
static int
508
UTF16LEToUTF8(unsigned char* out, int *outlen,
509
            const unsigned char* inb, int *inlenb)
510
39.1k
{
511
39.1k
    unsigned char* outstart = out;
512
39.1k
    const unsigned char* processed = inb;
513
39.1k
    unsigned char* outend;
514
39.1k
    unsigned short* in = (unsigned short*) inb;
515
39.1k
    unsigned short* inend;
516
39.1k
    unsigned int c, d, inlen;
517
39.1k
    unsigned char *tmp;
518
39.1k
    int bits;
519
520
39.1k
    if (*outlen == 0) {
521
0
        *inlenb = 0;
522
0
        return(0);
523
0
    }
524
39.1k
    outend = out + *outlen;
525
39.1k
    if ((*inlenb % 2) == 1)
526
649
        (*inlenb)--;
527
39.1k
    inlen = *inlenb / 2;
528
39.1k
    inend = in + inlen;
529
6.40M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
530
6.36M
        if (xmlLittleEndian) {
531
6.36M
      c= *in++;
532
6.36M
  } else {
533
0
      tmp = (unsigned char *) in;
534
0
      c = *tmp++;
535
0
      c = c | (((unsigned int)*tmp) << 8);
536
0
      in++;
537
0
  }
538
6.36M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
539
1.86k
      if (in >= inend) {           /* handle split mutli-byte characters */
540
286
    break;
541
286
      }
542
1.57k
      if (xmlLittleEndian) {
543
1.57k
    d = *in++;
544
1.57k
      } else {
545
0
    tmp = (unsigned char *) in;
546
0
    d = *tmp++;
547
0
    d = d | (((unsigned int)*tmp) << 8);
548
0
    in++;
549
0
      }
550
1.57k
            if ((d & 0xFC00) == 0xDC00) {
551
1.51k
                c &= 0x03FF;
552
1.51k
                c <<= 10;
553
1.51k
                c |= d & 0x03FF;
554
1.51k
                c += 0x10000;
555
1.51k
            }
556
61
            else {
557
61
    *outlen = out - outstart;
558
61
    *inlenb = processed - inb;
559
61
          return(-2);
560
61
      }
561
1.57k
        }
562
563
  /* assertion: c is a single UTF-4 value */
564
6.36M
        if (out >= outend)
565
0
      break;
566
6.36M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
567
5.96M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
568
5.91M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
569
1.51k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
570
571
18.2M
        for ( ; bits >= 0; bits-= 6) {
572
11.8M
            if (out >= outend)
573
0
          break;
574
11.8M
            *out++= ((c >> bits) & 0x3F) | 0x80;
575
11.8M
        }
576
6.36M
  processed = (const unsigned char*) in;
577
6.36M
    }
578
39.1k
    *outlen = out - outstart;
579
39.1k
    *inlenb = processed - inb;
580
39.1k
    return(*outlen);
581
39.1k
}
582
583
#ifdef LIBXML_OUTPUT_ENABLED
584
/**
585
 * UTF8ToUTF16LE:
586
 * @outb:  a pointer to an array of bytes to store the result
587
 * @outlen:  the length of @outb
588
 * @in:  a pointer to an array of UTF-8 chars
589
 * @inlen:  the length of @in
590
 *
591
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592
 * block of chars out.
593
 *
594
 * Returns the number of bytes written, or -1 if lack of space, or -2
595
 *     if the transcoding failed.
596
 */
597
static int
598
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599
            const unsigned char* in, int *inlen)
600
0
{
601
0
    unsigned short* out = (unsigned short*) outb;
602
0
    const unsigned char* processed = in;
603
0
    const unsigned char *const instart = in;
604
0
    unsigned short* outstart= out;
605
0
    unsigned short* outend;
606
0
    const unsigned char* inend;
607
0
    unsigned int c, d;
608
0
    int trailing;
609
0
    unsigned char *tmp;
610
0
    unsigned short tmp1, tmp2;
611
612
    /* UTF16LE encoding has no BOM */
613
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614
0
    if (in == NULL) {
615
0
  *outlen = 0;
616
0
  *inlen = 0;
617
0
  return(0);
618
0
    }
619
0
    inend= in + *inlen;
620
0
    outend = out + (*outlen / 2);
621
0
    while (in < inend) {
622
0
      d= *in++;
623
0
      if      (d < 0x80)  { c= d; trailing= 0; }
624
0
      else if (d < 0xC0) {
625
          /* trailing byte in leading position */
626
0
    *outlen = (out - outstart) * 2;
627
0
    *inlen = processed - instart;
628
0
    return(-2);
629
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
630
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
631
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
632
0
      else {
633
  /* no chance for this in UTF-16 */
634
0
  *outlen = (out - outstart) * 2;
635
0
  *inlen = processed - instart;
636
0
  return(-2);
637
0
      }
638
639
0
      if (inend - in < trailing) {
640
0
          break;
641
0
      }
642
643
0
      for ( ; trailing; trailing--) {
644
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645
0
        break;
646
0
          c <<= 6;
647
0
          c |= d & 0x3F;
648
0
      }
649
650
      /* assertion: c is a single UTF-4 value */
651
0
        if (c < 0x10000) {
652
0
            if (out >= outend)
653
0
          break;
654
0
      if (xmlLittleEndian) {
655
0
    *out++ = c;
656
0
      } else {
657
0
    tmp = (unsigned char *) out;
658
0
    *tmp = c ;
659
0
    *(tmp + 1) = c >> 8 ;
660
0
    out++;
661
0
      }
662
0
        }
663
0
        else if (c < 0x110000) {
664
0
            if (out+1 >= outend)
665
0
          break;
666
0
            c -= 0x10000;
667
0
      if (xmlLittleEndian) {
668
0
    *out++ = 0xD800 | (c >> 10);
669
0
    *out++ = 0xDC00 | (c & 0x03FF);
670
0
      } else {
671
0
    tmp1 = 0xD800 | (c >> 10);
672
0
    tmp = (unsigned char *) out;
673
0
    *tmp = (unsigned char) tmp1;
674
0
    *(tmp + 1) = tmp1 >> 8;
675
0
    out++;
676
677
0
    tmp2 = 0xDC00 | (c & 0x03FF);
678
0
    tmp = (unsigned char *) out;
679
0
    *tmp  = (unsigned char) tmp2;
680
0
    *(tmp + 1) = tmp2 >> 8;
681
0
    out++;
682
0
      }
683
0
        }
684
0
        else
685
0
      break;
686
0
  processed = in;
687
0
    }
688
0
    *outlen = (out - outstart) * 2;
689
0
    *inlen = processed - instart;
690
0
    return(*outlen);
691
0
}
692
693
/**
694
 * UTF8ToUTF16:
695
 * @outb:  a pointer to an array of bytes to store the result
696
 * @outlen:  the length of @outb
697
 * @in:  a pointer to an array of UTF-8 chars
698
 * @inlen:  the length of @in
699
 *
700
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701
 * block of chars out.
702
 *
703
 * Returns the number of bytes written, or -1 if lack of space, or -2
704
 *     if the transcoding failed.
705
 */
706
static int
707
UTF8ToUTF16(unsigned char* outb, int *outlen,
708
            const unsigned char* in, int *inlen)
709
0
{
710
0
    if (in == NULL) {
711
  /*
712
   * initialization, add the Byte Order Mark for UTF-16LE
713
   */
714
0
        if (*outlen >= 2) {
715
0
      outb[0] = 0xFF;
716
0
      outb[1] = 0xFE;
717
0
      *outlen = 2;
718
0
      *inlen = 0;
719
#ifdef DEBUG_ENCODING
720
            xmlGenericError(xmlGenericErrorContext,
721
        "Added FFFE Byte Order Mark\n");
722
#endif
723
0
      return(2);
724
0
  }
725
0
  *outlen = 0;
726
0
  *inlen = 0;
727
0
  return(0);
728
0
    }
729
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730
0
}
731
#endif /* LIBXML_OUTPUT_ENABLED */
732
733
/**
734
 * UTF16BEToUTF8:
735
 * @out:  a pointer to an array of bytes to store the result
736
 * @outlen:  the length of @out
737
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
738
 * @inlenb:  the length of @in in UTF-16 chars
739
 *
740
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741
 * block of chars out. This function assumes the endian property
742
 * is the same between the native type of this machine and the
743
 * inputed one.
744
 *
745
 * Returns the number of bytes written, or -1 if lack of space, or -2
746
 *     if the transcoding fails (if *in is not a valid utf16 string)
747
 * The value of *inlen after return is the number of octets consumed
748
 *     if the return value is positive, else unpredictable.
749
 */
750
static int
751
UTF16BEToUTF8(unsigned char* out, int *outlen,
752
            const unsigned char* inb, int *inlenb)
753
26.8k
{
754
26.8k
    unsigned char* outstart = out;
755
26.8k
    const unsigned char* processed = inb;
756
26.8k
    unsigned char* outend;
757
26.8k
    unsigned short* in = (unsigned short*) inb;
758
26.8k
    unsigned short* inend;
759
26.8k
    unsigned int c, d, inlen;
760
26.8k
    unsigned char *tmp;
761
26.8k
    int bits;
762
763
26.8k
    if (*outlen == 0) {
764
0
        *inlenb = 0;
765
0
        return(0);
766
0
    }
767
26.8k
    outend = out + *outlen;
768
26.8k
    if ((*inlenb % 2) == 1)
769
2.22k
        (*inlenb)--;
770
26.8k
    inlen = *inlenb / 2;
771
26.8k
    inend= in + inlen;
772
4.88M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
773
4.86M
  if (xmlLittleEndian) {
774
4.86M
      tmp = (unsigned char *) in;
775
4.86M
      c = *tmp++;
776
4.86M
      c = (c << 8) | (unsigned int) *tmp;
777
4.86M
      in++;
778
4.86M
  } else {
779
0
      c= *in++;
780
0
  }
781
4.86M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
782
761
      if (in >= inend) {           /* handle split mutli-byte characters */
783
287
                break;
784
287
      }
785
474
      if (xmlLittleEndian) {
786
474
    tmp = (unsigned char *) in;
787
474
    d = *tmp++;
788
474
    d = (d << 8) | (unsigned int) *tmp;
789
474
    in++;
790
474
      } else {
791
0
    d= *in++;
792
0
      }
793
474
            if ((d & 0xFC00) == 0xDC00) {
794
443
                c &= 0x03FF;
795
443
                c <<= 10;
796
443
                c |= d & 0x03FF;
797
443
                c += 0x10000;
798
443
            }
799
31
            else {
800
31
    *outlen = out - outstart;
801
31
    *inlenb = processed - inb;
802
31
          return(-2);
803
31
      }
804
474
        }
805
806
  /* assertion: c is a single UTF-4 value */
807
4.86M
        if (out >= outend)
808
0
      break;
809
4.86M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
810
4.58M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
811
4.46M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
812
443
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
813
814
13.9M
        for ( ; bits >= 0; bits-= 6) {
815
9.05M
            if (out >= outend)
816
0
          break;
817
9.05M
            *out++= ((c >> bits) & 0x3F) | 0x80;
818
9.05M
        }
819
4.86M
  processed = (const unsigned char*) in;
820
4.86M
    }
821
26.8k
    *outlen = out - outstart;
822
26.8k
    *inlenb = processed - inb;
823
26.8k
    return(*outlen);
824
26.8k
}
825
826
#ifdef LIBXML_OUTPUT_ENABLED
827
/**
828
 * UTF8ToUTF16BE:
829
 * @outb:  a pointer to an array of bytes to store the result
830
 * @outlen:  the length of @outb
831
 * @in:  a pointer to an array of UTF-8 chars
832
 * @inlen:  the length of @in
833
 *
834
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835
 * block of chars out.
836
 *
837
 * Returns the number of byte written, or -1 by lack of space, or -2
838
 *     if the transcoding failed.
839
 */
840
static int
841
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842
            const unsigned char* in, int *inlen)
843
0
{
844
0
    unsigned short* out = (unsigned short*) outb;
845
0
    const unsigned char* processed = in;
846
0
    const unsigned char *const instart = in;
847
0
    unsigned short* outstart= out;
848
0
    unsigned short* outend;
849
0
    const unsigned char* inend;
850
0
    unsigned int c, d;
851
0
    int trailing;
852
0
    unsigned char *tmp;
853
0
    unsigned short tmp1, tmp2;
854
855
    /* UTF-16BE has no BOM */
856
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857
0
    if (in == NULL) {
858
0
  *outlen = 0;
859
0
  *inlen = 0;
860
0
  return(0);
861
0
    }
862
0
    inend= in + *inlen;
863
0
    outend = out + (*outlen / 2);
864
0
    while (in < inend) {
865
0
      d= *in++;
866
0
      if      (d < 0x80)  { c= d; trailing= 0; }
867
0
      else if (d < 0xC0)  {
868
          /* trailing byte in leading position */
869
0
    *outlen = out - outstart;
870
0
    *inlen = processed - instart;
871
0
    return(-2);
872
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
873
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
874
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
875
0
      else {
876
          /* no chance for this in UTF-16 */
877
0
    *outlen = out - outstart;
878
0
    *inlen = processed - instart;
879
0
    return(-2);
880
0
      }
881
882
0
      if (inend - in < trailing) {
883
0
          break;
884
0
      }
885
886
0
      for ( ; trailing; trailing--) {
887
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
888
0
          c <<= 6;
889
0
          c |= d & 0x3F;
890
0
      }
891
892
      /* assertion: c is a single UTF-4 value */
893
0
        if (c < 0x10000) {
894
0
            if (out >= outend)  break;
895
0
      if (xmlLittleEndian) {
896
0
    tmp = (unsigned char *) out;
897
0
    *tmp = c >> 8;
898
0
    *(tmp + 1) = c;
899
0
    out++;
900
0
      } else {
901
0
    *out++ = c;
902
0
      }
903
0
        }
904
0
        else if (c < 0x110000) {
905
0
            if (out+1 >= outend)  break;
906
0
            c -= 0x10000;
907
0
      if (xmlLittleEndian) {
908
0
    tmp1 = 0xD800 | (c >> 10);
909
0
    tmp = (unsigned char *) out;
910
0
    *tmp = tmp1 >> 8;
911
0
    *(tmp + 1) = (unsigned char) tmp1;
912
0
    out++;
913
914
0
    tmp2 = 0xDC00 | (c & 0x03FF);
915
0
    tmp = (unsigned char *) out;
916
0
    *tmp = tmp2 >> 8;
917
0
    *(tmp + 1) = (unsigned char) tmp2;
918
0
    out++;
919
0
      } else {
920
0
    *out++ = 0xD800 | (c >> 10);
921
0
    *out++ = 0xDC00 | (c & 0x03FF);
922
0
      }
923
0
        }
924
0
        else
925
0
      break;
926
0
  processed = in;
927
0
    }
928
0
    *outlen = (out - outstart) * 2;
929
0
    *inlen = processed - instart;
930
0
    return(*outlen);
931
0
}
932
#endif /* LIBXML_OUTPUT_ENABLED */
933
934
/************************************************************************
935
 *                  *
936
 *    Generic encoding handling routines      *
937
 *                  *
938
 ************************************************************************/
939
940
/**
941
 * xmlDetectCharEncoding:
942
 * @in:  a pointer to the first bytes of the XML entity, must be at least
943
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
944
 * @len:  pointer to the length of the buffer
945
 *
946
 * Guess the encoding of the entity using the first bytes of the entity content
947
 * according to the non-normative appendix F of the XML-1.0 recommendation.
948
 *
949
 * Returns one of the XML_CHAR_ENCODING_... values.
950
 */
951
xmlCharEncoding
952
xmlDetectCharEncoding(const unsigned char* in, int len)
953
17.9k
{
954
17.9k
    if (in == NULL)
955
0
        return(XML_CHAR_ENCODING_NONE);
956
17.9k
    if (len >= 4) {
957
17.9k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
958
17.9k
      (in[2] == 0x00) && (in[3] == 0x3C))
959
18
      return(XML_CHAR_ENCODING_UCS4BE);
960
17.9k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961
17.9k
      (in[2] == 0x00) && (in[3] == 0x00))
962
7
      return(XML_CHAR_ENCODING_UCS4LE);
963
17.9k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
964
17.9k
      (in[2] == 0x3C) && (in[3] == 0x00))
965
1
      return(XML_CHAR_ENCODING_UCS4_2143);
966
17.9k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967
17.9k
      (in[2] == 0x00) && (in[3] == 0x00))
968
2
      return(XML_CHAR_ENCODING_UCS4_3412);
969
17.9k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970
17.9k
      (in[2] == 0xA7) && (in[3] == 0x94))
971
63
      return(XML_CHAR_ENCODING_EBCDIC);
972
17.9k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973
17.9k
      (in[2] == 0x78) && (in[3] == 0x6D))
974
3.83k
      return(XML_CHAR_ENCODING_UTF8);
975
  /*
976
   * Although not part of the recommendation, we also
977
   * attempt an "auto-recognition" of UTF-16LE and
978
   * UTF-16BE encodings.
979
   */
980
14.0k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981
14.0k
      (in[2] == 0x3F) && (in[3] == 0x00))
982
175
      return(XML_CHAR_ENCODING_UTF16LE);
983
13.8k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984
13.8k
      (in[2] == 0x00) && (in[3] == 0x3F))
985
109
      return(XML_CHAR_ENCODING_UTF16BE);
986
13.8k
    }
987
13.7k
    if (len >= 3) {
988
  /*
989
   * Errata on XML-1.0 June 20 2001
990
   * We now allow an UTF8 encoded BOM
991
   */
992
13.7k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993
13.7k
      (in[2] == 0xBF))
994
7
      return(XML_CHAR_ENCODING_UTF8);
995
13.7k
    }
996
    /* For UTF-16 we can recognize by the BOM */
997
13.7k
    if (len >= 2) {
998
13.7k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
999
252
      return(XML_CHAR_ENCODING_UTF16BE);
1000
13.5k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001
190
      return(XML_CHAR_ENCODING_UTF16LE);
1002
13.5k
    }
1003
13.3k
    return(XML_CHAR_ENCODING_NONE);
1004
13.7k
}
1005
1006
/**
1007
 * xmlCleanupEncodingAliases:
1008
 *
1009
 * Unregisters all aliases
1010
 */
1011
void
1012
0
xmlCleanupEncodingAliases(void) {
1013
0
    int i;
1014
1015
0
    if (xmlCharEncodingAliases == NULL)
1016
0
  return;
1017
1018
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019
0
  if (xmlCharEncodingAliases[i].name != NULL)
1020
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1021
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1022
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023
0
    }
1024
0
    xmlCharEncodingAliasesNb = 0;
1025
0
    xmlCharEncodingAliasesMax = 0;
1026
0
    xmlFree(xmlCharEncodingAliases);
1027
0
    xmlCharEncodingAliases = NULL;
1028
0
}
1029
1030
/**
1031
 * xmlGetEncodingAlias:
1032
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1033
 *
1034
 * Lookup an encoding name for the given alias.
1035
 *
1036
 * Returns NULL if not found, otherwise the original name
1037
 */
1038
const char *
1039
4.67k
xmlGetEncodingAlias(const char *alias) {
1040
4.67k
    int i;
1041
4.67k
    char upper[100];
1042
1043
4.67k
    if (alias == NULL)
1044
0
  return(NULL);
1045
1046
4.67k
    if (xmlCharEncodingAliases == NULL)
1047
4.67k
  return(NULL);
1048
1049
0
    for (i = 0;i < 99;i++) {
1050
0
        upper[i] = toupper(alias[i]);
1051
0
  if (upper[i] == 0) break;
1052
0
    }
1053
0
    upper[i] = 0;
1054
1055
    /*
1056
     * Walk down the list looking for a definition of the alias
1057
     */
1058
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060
0
      return(xmlCharEncodingAliases[i].name);
1061
0
  }
1062
0
    }
1063
0
    return(NULL);
1064
0
}
1065
1066
/**
1067
 * xmlAddEncodingAlias:
1068
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1069
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1070
 *
1071
 * Registers an alias @alias for an encoding named @name. Existing alias
1072
 * will be overwritten.
1073
 *
1074
 * Returns 0 in case of success, -1 in case of error
1075
 */
1076
int
1077
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1078
0
    int i;
1079
0
    char upper[100];
1080
1081
0
    if ((name == NULL) || (alias == NULL))
1082
0
  return(-1);
1083
1084
0
    for (i = 0;i < 99;i++) {
1085
0
        upper[i] = toupper(alias[i]);
1086
0
  if (upper[i] == 0) break;
1087
0
    }
1088
0
    upper[i] = 0;
1089
1090
0
    if (xmlCharEncodingAliases == NULL) {
1091
0
  xmlCharEncodingAliasesNb = 0;
1092
0
  xmlCharEncodingAliasesMax = 20;
1093
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095
0
  if (xmlCharEncodingAliases == NULL)
1096
0
      return(-1);
1097
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098
0
  xmlCharEncodingAliasesMax *= 2;
1099
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100
0
        xmlRealloc(xmlCharEncodingAliases,
1101
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102
0
    }
1103
    /*
1104
     * Walk down the list looking for a definition of the alias
1105
     */
1106
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108
      /*
1109
       * Replace the definition.
1110
       */
1111
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1112
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113
0
      return(0);
1114
0
  }
1115
0
    }
1116
    /*
1117
     * Add the definition
1118
     */
1119
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121
0
    xmlCharEncodingAliasesNb++;
1122
0
    return(0);
1123
0
}
1124
1125
/**
1126
 * xmlDelEncodingAlias:
1127
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1128
 *
1129
 * Unregisters an encoding alias @alias
1130
 *
1131
 * Returns 0 in case of success, -1 in case of error
1132
 */
1133
int
1134
0
xmlDelEncodingAlias(const char *alias) {
1135
0
    int i;
1136
1137
0
    if (alias == NULL)
1138
0
  return(-1);
1139
1140
0
    if (xmlCharEncodingAliases == NULL)
1141
0
  return(-1);
1142
    /*
1143
     * Walk down the list looking for a definition of the alias
1144
     */
1145
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1148
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149
0
      xmlCharEncodingAliasesNb--;
1150
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152
0
      return(0);
1153
0
  }
1154
0
    }
1155
0
    return(-1);
1156
0
}
1157
1158
/**
1159
 * xmlParseCharEncoding:
1160
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1161
 *
1162
 * Compare the string to the encoding schemes already known. Note
1163
 * that the comparison is case insensitive accordingly to the section
1164
 * [XML] 4.3.3 Character Encoding in Entities.
1165
 *
1166
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167
 * if not recognized.
1168
 */
1169
xmlCharEncoding
1170
xmlParseCharEncoding(const char* name)
1171
777
{
1172
777
    const char *alias;
1173
777
    char upper[500];
1174
777
    int i;
1175
1176
777
    if (name == NULL)
1177
0
  return(XML_CHAR_ENCODING_NONE);
1178
1179
    /*
1180
     * Do the alias resolution
1181
     */
1182
777
    alias = xmlGetEncodingAlias(name);
1183
777
    if (alias != NULL)
1184
0
  name = alias;
1185
1186
8.30k
    for (i = 0;i < 499;i++) {
1187
8.30k
        upper[i] = toupper(name[i]);
1188
8.30k
  if (upper[i] == 0) break;
1189
8.30k
    }
1190
777
    upper[i] = 0;
1191
1192
777
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193
777
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194
777
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195
1196
    /*
1197
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1198
     *       already found and in use
1199
     */
1200
777
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201
777
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
1203
777
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204
774
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205
774
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206
1207
    /*
1208
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1209
     *       already found and in use
1210
     */
1211
774
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212
721
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213
721
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
1215
1216
721
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217
721
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218
699
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219
1220
699
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221
699
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222
672
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223
1224
672
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225
672
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226
672
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227
672
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228
672
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229
672
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230
672
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231
1232
672
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233
672
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234
672
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235
1236
#ifdef DEBUG_ENCODING
1237
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238
#endif
1239
672
    return(XML_CHAR_ENCODING_ERROR);
1240
672
}
1241
1242
/**
1243
 * xmlGetCharEncodingName:
1244
 * @enc:  the encoding
1245
 *
1246
 * The "canonical" name for XML encoding.
1247
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248
 * Section 4.3.3  Character Encoding in Entities
1249
 *
1250
 * Returns the canonical name for the given encoding
1251
 */
1252
1253
const char*
1254
108
xmlGetCharEncodingName(xmlCharEncoding enc) {
1255
108
    switch (enc) {
1256
0
        case XML_CHAR_ENCODING_ERROR:
1257
0
      return(NULL);
1258
0
        case XML_CHAR_ENCODING_NONE:
1259
0
      return(NULL);
1260
0
        case XML_CHAR_ENCODING_UTF8:
1261
0
      return("UTF-8");
1262
0
        case XML_CHAR_ENCODING_UTF16LE:
1263
0
      return("UTF-16");
1264
0
        case XML_CHAR_ENCODING_UTF16BE:
1265
0
      return("UTF-16");
1266
0
        case XML_CHAR_ENCODING_EBCDIC:
1267
0
            return("EBCDIC");
1268
53
        case XML_CHAR_ENCODING_UCS4LE:
1269
53
            return("ISO-10646-UCS-4");
1270
0
        case XML_CHAR_ENCODING_UCS4BE:
1271
0
            return("ISO-10646-UCS-4");
1272
1
        case XML_CHAR_ENCODING_UCS4_2143:
1273
1
            return("ISO-10646-UCS-4");
1274
2
        case XML_CHAR_ENCODING_UCS4_3412:
1275
2
            return("ISO-10646-UCS-4");
1276
3
        case XML_CHAR_ENCODING_UCS2:
1277
3
            return("ISO-10646-UCS-2");
1278
22
        case XML_CHAR_ENCODING_8859_1:
1279
22
      return("ISO-8859-1");
1280
27
        case XML_CHAR_ENCODING_8859_2:
1281
27
      return("ISO-8859-2");
1282
0
        case XML_CHAR_ENCODING_8859_3:
1283
0
      return("ISO-8859-3");
1284
0
        case XML_CHAR_ENCODING_8859_4:
1285
0
      return("ISO-8859-4");
1286
0
        case XML_CHAR_ENCODING_8859_5:
1287
0
      return("ISO-8859-5");
1288
0
        case XML_CHAR_ENCODING_8859_6:
1289
0
      return("ISO-8859-6");
1290
0
        case XML_CHAR_ENCODING_8859_7:
1291
0
      return("ISO-8859-7");
1292
0
        case XML_CHAR_ENCODING_8859_8:
1293
0
      return("ISO-8859-8");
1294
0
        case XML_CHAR_ENCODING_8859_9:
1295
0
      return("ISO-8859-9");
1296
0
        case XML_CHAR_ENCODING_2022_JP:
1297
0
            return("ISO-2022-JP");
1298
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1299
0
            return("Shift-JIS");
1300
0
        case XML_CHAR_ENCODING_EUC_JP:
1301
0
            return("EUC-JP");
1302
0
  case XML_CHAR_ENCODING_ASCII:
1303
0
      return(NULL);
1304
108
    }
1305
0
    return(NULL);
1306
108
}
1307
1308
/************************************************************************
1309
 *                  *
1310
 *      Char encoding handlers        *
1311
 *                  *
1312
 ************************************************************************/
1313
1314
1315
/* the size should be growable, but it's not a big deal ... */
1316
9
#define MAX_ENCODING_HANDLERS 50
1317
static xmlCharEncodingHandlerPtr *handlers = NULL;
1318
static int nbCharEncodingHandler = 0;
1319
1320
/*
1321
 * The default is UTF-8 for XML, that's also the default used for the
1322
 * parser internals, so the default encoding handler is NULL
1323
 */
1324
1325
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326
1327
/**
1328
 * xmlNewCharEncodingHandler:
1329
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1330
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1331
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1332
 *
1333
 * Create and registers an xmlCharEncodingHandler.
1334
 *
1335
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336
 */
1337
xmlCharEncodingHandlerPtr
1338
xmlNewCharEncodingHandler(const char *name,
1339
                          xmlCharEncodingInputFunc input,
1340
8
                          xmlCharEncodingOutputFunc output) {
1341
8
    xmlCharEncodingHandlerPtr handler;
1342
8
    const char *alias;
1343
8
    char upper[500];
1344
8
    int i;
1345
8
    char *up = NULL;
1346
1347
    /*
1348
     * Do the alias resolution
1349
     */
1350
8
    alias = xmlGetEncodingAlias(name);
1351
8
    if (alias != NULL)
1352
0
  name = alias;
1353
1354
    /*
1355
     * Keep only the uppercase version of the encoding.
1356
     */
1357
8
    if (name == NULL) {
1358
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1359
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1360
0
  return(NULL);
1361
0
    }
1362
62
    for (i = 0;i < 499;i++) {
1363
62
        upper[i] = toupper(name[i]);
1364
62
  if (upper[i] == 0) break;
1365
62
    }
1366
8
    upper[i] = 0;
1367
8
    up = xmlMemStrdup(upper);
1368
8
    if (up == NULL) {
1369
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370
0
  return(NULL);
1371
0
    }
1372
1373
    /*
1374
     * allocate and fill-up an handler block.
1375
     */
1376
8
    handler = (xmlCharEncodingHandlerPtr)
1377
8
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1378
8
    if (handler == NULL) {
1379
0
        xmlFree(up);
1380
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381
0
  return(NULL);
1382
0
    }
1383
8
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384
8
    handler->input = input;
1385
8
    handler->output = output;
1386
8
    handler->name = up;
1387
1388
8
#ifdef LIBXML_ICONV_ENABLED
1389
8
    handler->iconv_in = NULL;
1390
8
    handler->iconv_out = NULL;
1391
8
#endif
1392
#ifdef LIBXML_ICU_ENABLED
1393
    handler->uconv_in = NULL;
1394
    handler->uconv_out = NULL;
1395
#endif
1396
1397
    /*
1398
     * registers and returns the handler.
1399
     */
1400
8
    xmlRegisterCharEncodingHandler(handler);
1401
#ifdef DEBUG_ENCODING
1402
    xmlGenericError(xmlGenericErrorContext,
1403
      "Registered encoding handler for %s\n", name);
1404
#endif
1405
8
    return(handler);
1406
8
}
1407
1408
/**
1409
 * xmlInitCharEncodingHandlers:
1410
 *
1411
 * DEPRECATED: This function will be made private. Call xmlInitParser to
1412
 * initialize the library.
1413
 *
1414
 * Initialize the char encoding support, it registers the default
1415
 * encoding supported.
1416
 * NOTE: while public, this function usually doesn't need to be called
1417
 *       in normal processing.
1418
 */
1419
void
1420
1
xmlInitCharEncodingHandlers(void) {
1421
1
    unsigned short int tst = 0x1234;
1422
1
    unsigned char *ptr = (unsigned char *) &tst;
1423
1424
1
    if (handlers != NULL) return;
1425
1426
1
    handlers = (xmlCharEncodingHandlerPtr *)
1427
1
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428
1429
1
    if (*ptr == 0x12) xmlLittleEndian = 0;
1430
1
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1431
0
    else {
1432
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433
0
                 "Odd problem at endianness detection\n", NULL);
1434
0
    }
1435
1436
1
    if (handlers == NULL) {
1437
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438
0
  return;
1439
0
    }
1440
1
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441
1
#ifdef LIBXML_OUTPUT_ENABLED
1442
1
    xmlUTF16LEHandler =
1443
1
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444
1
    xmlUTF16BEHandler =
1445
1
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446
1
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447
1
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448
1
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449
1
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450
1
#ifdef LIBXML_HTML_ENABLED
1451
1
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452
1
#endif
1453
#else
1454
    xmlUTF16LEHandler =
1455
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456
    xmlUTF16BEHandler =
1457
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462
#endif /* LIBXML_OUTPUT_ENABLED */
1463
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464
#ifdef LIBXML_ISO8859X_ENABLED
1465
    xmlRegisterCharEncodingHandlersISO8859x ();
1466
#endif
1467
#endif
1468
1469
1
}
1470
1471
/**
1472
 * xmlCleanupCharEncodingHandlers:
1473
 *
1474
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475
 * to free global state but see the warnings there. xmlCleanupParser
1476
 * should be only called once at program exit. In most cases, you don't
1477
 * have call cleanup functions at all.
1478
 *
1479
 * Cleanup the memory allocated for the char encoding support, it
1480
 * unregisters all the encoding handlers and the aliases.
1481
 */
1482
void
1483
0
xmlCleanupCharEncodingHandlers(void) {
1484
0
    xmlCleanupEncodingAliases();
1485
1486
0
    if (handlers == NULL) return;
1487
1488
0
    for (;nbCharEncodingHandler > 0;) {
1489
0
        nbCharEncodingHandler--;
1490
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1491
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1492
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1493
0
      xmlFree(handlers[nbCharEncodingHandler]);
1494
0
  }
1495
0
    }
1496
0
    xmlFree(handlers);
1497
0
    handlers = NULL;
1498
0
    nbCharEncodingHandler = 0;
1499
0
    xmlDefaultCharEncodingHandler = NULL;
1500
0
}
1501
1502
/**
1503
 * xmlRegisterCharEncodingHandler:
1504
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1505
 *
1506
 * Register the char encoding handler, surprising, isn't it ?
1507
 */
1508
void
1509
8
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510
8
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1511
8
    if ((handler == NULL) || (handlers == NULL)) {
1512
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1513
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514
0
        goto free_handler;
1515
0
    }
1516
1517
8
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520
0
                 "MAX_ENCODING_HANDLERS");
1521
0
        goto free_handler;
1522
0
    }
1523
8
    handlers[nbCharEncodingHandler++] = handler;
1524
8
    return;
1525
1526
0
free_handler:
1527
0
    if (handler != NULL) {
1528
0
        if (handler->name != NULL) {
1529
0
            xmlFree(handler->name);
1530
0
        }
1531
0
        xmlFree(handler);
1532
0
    }
1533
0
}
1534
1535
/**
1536
 * xmlGetCharEncodingHandler:
1537
 * @enc:  an xmlCharEncoding value.
1538
 *
1539
 * Search in the registered set the handler able to read/write that encoding.
1540
 *
1541
 * Returns the handler or NULL if not found
1542
 */
1543
xmlCharEncodingHandlerPtr
1544
19.1k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545
19.1k
    xmlCharEncodingHandlerPtr handler;
1546
1547
19.1k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1548
19.1k
    switch (enc) {
1549
0
        case XML_CHAR_ENCODING_ERROR:
1550
0
      return(NULL);
1551
13.6k
        case XML_CHAR_ENCODING_NONE:
1552
13.6k
      return(NULL);
1553
3.84k
        case XML_CHAR_ENCODING_UTF8:
1554
3.84k
      return(NULL);
1555
730
        case XML_CHAR_ENCODING_UTF16LE:
1556
730
      return(xmlUTF16LEHandler);
1557
722
        case XML_CHAR_ENCODING_UTF16BE:
1558
722
      return(xmlUTF16BEHandler);
1559
126
        case XML_CHAR_ENCODING_EBCDIC:
1560
126
            handler = xmlFindCharEncodingHandler("EBCDIC");
1561
126
            if (handler != NULL) return(handler);
1562
126
            handler = xmlFindCharEncodingHandler("ebcdic");
1563
126
            if (handler != NULL) return(handler);
1564
126
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565
126
            if (handler != NULL) return(handler);
1566
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1567
0
            if (handler != NULL) return(handler);
1568
0
      break;
1569
36
        case XML_CHAR_ENCODING_UCS4BE:
1570
36
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571
36
            if (handler != NULL) return(handler);
1572
36
            handler = xmlFindCharEncodingHandler("UCS-4");
1573
36
            if (handler != NULL) return(handler);
1574
0
            handler = xmlFindCharEncodingHandler("UCS4");
1575
0
            if (handler != NULL) return(handler);
1576
0
      break;
1577
14
        case XML_CHAR_ENCODING_UCS4LE:
1578
14
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579
14
            if (handler != NULL) return(handler);
1580
14
            handler = xmlFindCharEncodingHandler("UCS-4");
1581
14
            if (handler != NULL) return(handler);
1582
0
            handler = xmlFindCharEncodingHandler("UCS4");
1583
0
            if (handler != NULL) return(handler);
1584
0
      break;
1585
2
        case XML_CHAR_ENCODING_UCS4_2143:
1586
2
      break;
1587
4
        case XML_CHAR_ENCODING_UCS4_3412:
1588
4
      break;
1589
0
        case XML_CHAR_ENCODING_UCS2:
1590
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591
0
            if (handler != NULL) return(handler);
1592
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1593
0
            if (handler != NULL) return(handler);
1594
0
            handler = xmlFindCharEncodingHandler("UCS2");
1595
0
            if (handler != NULL) return(handler);
1596
0
      break;
1597
1598
      /*
1599
       * We used to keep ISO Latin encodings native in the
1600
       * generated data. This led to so many problems that
1601
       * this has been removed. One can still change this
1602
       * back by registering no-ops encoders for those
1603
       */
1604
0
        case XML_CHAR_ENCODING_8859_1:
1605
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606
0
      if (handler != NULL) return(handler);
1607
0
      break;
1608
0
        case XML_CHAR_ENCODING_8859_2:
1609
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610
0
      if (handler != NULL) return(handler);
1611
0
      break;
1612
0
        case XML_CHAR_ENCODING_8859_3:
1613
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614
0
      if (handler != NULL) return(handler);
1615
0
      break;
1616
0
        case XML_CHAR_ENCODING_8859_4:
1617
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618
0
      if (handler != NULL) return(handler);
1619
0
      break;
1620
0
        case XML_CHAR_ENCODING_8859_5:
1621
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622
0
      if (handler != NULL) return(handler);
1623
0
      break;
1624
0
        case XML_CHAR_ENCODING_8859_6:
1625
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626
0
      if (handler != NULL) return(handler);
1627
0
      break;
1628
0
        case XML_CHAR_ENCODING_8859_7:
1629
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630
0
      if (handler != NULL) return(handler);
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_8859_8:
1633
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634
0
      if (handler != NULL) return(handler);
1635
0
      break;
1636
0
        case XML_CHAR_ENCODING_8859_9:
1637
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638
0
      if (handler != NULL) return(handler);
1639
0
      break;
1640
1641
1642
0
        case XML_CHAR_ENCODING_2022_JP:
1643
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644
0
            if (handler != NULL) return(handler);
1645
0
      break;
1646
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1647
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648
0
            if (handler != NULL) return(handler);
1649
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650
0
            if (handler != NULL) return(handler);
1651
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1652
0
            if (handler != NULL) return(handler);
1653
0
      break;
1654
0
        case XML_CHAR_ENCODING_EUC_JP:
1655
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1656
0
            if (handler != NULL) return(handler);
1657
0
      break;
1658
0
  default:
1659
0
      break;
1660
19.1k
    }
1661
1662
#ifdef DEBUG_ENCODING
1663
    xmlGenericError(xmlGenericErrorContext,
1664
      "No handler found for encoding %d\n", enc);
1665
#endif
1666
6
    return(NULL);
1667
19.1k
}
1668
1669
/**
1670
 * xmlFindCharEncodingHandler:
1671
 * @name:  a string describing the char encoding.
1672
 *
1673
 * Search in the registered set the handler able to read/write that encoding.
1674
 *
1675
 * Returns the handler or NULL if not found
1676
 */
1677
xmlCharEncodingHandlerPtr
1678
3.88k
xmlFindCharEncodingHandler(const char *name) {
1679
3.88k
    const char *nalias;
1680
3.88k
    const char *norig;
1681
3.88k
    xmlCharEncoding alias;
1682
3.88k
#ifdef LIBXML_ICONV_ENABLED
1683
3.88k
    xmlCharEncodingHandlerPtr enc;
1684
3.88k
    iconv_t icv_in, icv_out;
1685
3.88k
#endif /* LIBXML_ICONV_ENABLED */
1686
#ifdef LIBXML_ICU_ENABLED
1687
    xmlCharEncodingHandlerPtr encu;
1688
    uconv_t *ucv_in, *ucv_out;
1689
#endif /* LIBXML_ICU_ENABLED */
1690
3.88k
    char upper[100];
1691
3.88k
    int i;
1692
1693
3.88k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1694
3.88k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695
3.88k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696
1697
    /*
1698
     * Do the alias resolution
1699
     */
1700
3.88k
    norig = name;
1701
3.88k
    nalias = xmlGetEncodingAlias(name);
1702
3.88k
    if (nalias != NULL)
1703
0
  name = nalias;
1704
1705
    /*
1706
     * Check first for directly registered encoding names
1707
     */
1708
27.0k
    for (i = 0;i < 99;i++) {
1709
27.0k
        upper[i] = toupper(name[i]);
1710
27.0k
  if (upper[i] == 0) break;
1711
27.0k
    }
1712
3.88k
    upper[i] = 0;
1713
1714
3.88k
    if (handlers != NULL) {
1715
33.8k
        for (i = 0;i < nbCharEncodingHandler; i++) {
1716
30.2k
            if (!strcmp(upper, handlers[i]->name)) {
1717
#ifdef DEBUG_ENCODING
1718
                xmlGenericError(xmlGenericErrorContext,
1719
                        "Found registered handler for encoding %s\n", name);
1720
#endif
1721
351
                return(handlers[i]);
1722
351
            }
1723
30.2k
        }
1724
3.88k
    }
1725
1726
3.53k
#ifdef LIBXML_ICONV_ENABLED
1727
    /* check whether iconv can handle this */
1728
3.53k
    icv_in = iconv_open("UTF-8", name);
1729
3.53k
    icv_out = iconv_open(name, "UTF-8");
1730
3.53k
    if (icv_in == (iconv_t) -1) {
1731
777
        icv_in = iconv_open("UTF-8", upper);
1732
777
    }
1733
3.53k
    if (icv_out == (iconv_t) -1) {
1734
777
  icv_out = iconv_open(upper, "UTF-8");
1735
777
    }
1736
3.53k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737
2.75k
      enc = (xmlCharEncodingHandlerPtr)
1738
2.75k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1739
2.75k
      if (enc == NULL) {
1740
0
          iconv_close(icv_in);
1741
0
          iconv_close(icv_out);
1742
0
    return(NULL);
1743
0
      }
1744
2.75k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745
2.75k
      enc->name = xmlMemStrdup(name);
1746
2.75k
      enc->input = NULL;
1747
2.75k
      enc->output = NULL;
1748
2.75k
      enc->iconv_in = icv_in;
1749
2.75k
      enc->iconv_out = icv_out;
1750
#ifdef DEBUG_ENCODING
1751
            xmlGenericError(xmlGenericErrorContext,
1752
        "Found iconv handler for encoding %s\n", name);
1753
#endif
1754
2.75k
      return enc;
1755
2.75k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757
0
        "iconv : problems with filters for '%s'\n", name);
1758
0
      if (icv_in != (iconv_t) -1)
1759
0
    iconv_close(icv_in);
1760
0
      else
1761
0
    iconv_close(icv_out);
1762
0
    }
1763
777
#endif /* LIBXML_ICONV_ENABLED */
1764
#ifdef LIBXML_ICU_ENABLED
1765
    /* check whether icu can handle this */
1766
    ucv_in = openIcuConverter(name, 1);
1767
    ucv_out = openIcuConverter(name, 0);
1768
    if (ucv_in != NULL && ucv_out != NULL) {
1769
      encu = (xmlCharEncodingHandlerPtr)
1770
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1771
      if (encu == NULL) {
1772
                closeIcuConverter(ucv_in);
1773
                closeIcuConverter(ucv_out);
1774
    return(NULL);
1775
      }
1776
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777
      encu->name = xmlMemStrdup(name);
1778
      encu->input = NULL;
1779
      encu->output = NULL;
1780
      encu->uconv_in = ucv_in;
1781
      encu->uconv_out = ucv_out;
1782
#ifdef DEBUG_ENCODING
1783
            xmlGenericError(xmlGenericErrorContext,
1784
        "Found ICU converter handler for encoding %s\n", name);
1785
#endif
1786
      return encu;
1787
    } else if (ucv_in != NULL || ucv_out != NULL) {
1788
            closeIcuConverter(ucv_in);
1789
            closeIcuConverter(ucv_out);
1790
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791
        "ICU converter : problems with filters for '%s'\n", name);
1792
    }
1793
#endif /* LIBXML_ICU_ENABLED */
1794
1795
#ifdef DEBUG_ENCODING
1796
    xmlGenericError(xmlGenericErrorContext,
1797
      "No handler found for encoding %s\n", name);
1798
#endif
1799
1800
    /*
1801
     * Fallback using the canonical names
1802
     */
1803
777
    alias = xmlParseCharEncoding(norig);
1804
777
    if (alias != XML_CHAR_ENCODING_ERROR) {
1805
105
        const char* canon;
1806
105
        canon = xmlGetCharEncodingName(alias);
1807
105
        if ((canon != NULL) && (strcmp(name, canon))) {
1808
51
      return(xmlFindCharEncodingHandler(canon));
1809
51
        }
1810
105
    }
1811
1812
    /* If "none of the above", give up */
1813
726
    return(NULL);
1814
777
}
1815
1816
/************************************************************************
1817
 *                  *
1818
 *    ICONV based generic conversion functions    *
1819
 *                  *
1820
 ************************************************************************/
1821
1822
#ifdef LIBXML_ICONV_ENABLED
1823
/**
1824
 * xmlIconvWrapper:
1825
 * @cd:   iconv converter data structure
1826
 * @out:  a pointer to an array of bytes to store the result
1827
 * @outlen:  the length of @out
1828
 * @in:  a pointer to an array of input bytes
1829
 * @inlen:  the length of @in
1830
 *
1831
 * Returns 0 if success, or
1832
 *     -1 by lack of space, or
1833
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1834
 *        the result of transformation can't fit into the encoding we want), or
1835
 *     -3 if there the last byte can't form a single output char.
1836
 *
1837
 * The value of @inlen after return is the number of octets consumed
1838
 *     as the return value is positive, else unpredictable.
1839
 * The value of @outlen after return is the number of octets produced.
1840
 */
1841
static int
1842
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843
65.4k
                const unsigned char *in, int *inlen) {
1844
65.4k
    size_t icv_inlen, icv_outlen;
1845
65.4k
    const char *icv_in = (const char *) in;
1846
65.4k
    char *icv_out = (char *) out;
1847
65.4k
    size_t ret;
1848
1849
65.4k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850
0
        if (outlen != NULL) *outlen = 0;
1851
0
        return(-1);
1852
0
    }
1853
65.4k
    icv_inlen = *inlen;
1854
65.4k
    icv_outlen = *outlen;
1855
    /*
1856
     * Some versions take const, other versions take non-const input.
1857
     */
1858
65.4k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859
65.4k
    *inlen -= icv_inlen;
1860
65.4k
    *outlen -= icv_outlen;
1861
65.4k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862
9.59k
#ifdef EILSEQ
1863
9.59k
        if (errno == EILSEQ) {
1864
578
            return -2;
1865
578
        } else
1866
9.01k
#endif
1867
9.01k
#ifdef E2BIG
1868
9.01k
        if (errno == E2BIG) {
1869
8.54k
            return -1;
1870
8.54k
        } else
1871
472
#endif
1872
472
#ifdef EINVAL
1873
472
        if (errno == EINVAL) {
1874
472
            return -3;
1875
472
        } else
1876
0
#endif
1877
0
        {
1878
0
            return -3;
1879
0
        }
1880
9.59k
    }
1881
55.8k
    return 0;
1882
65.4k
}
1883
#endif /* LIBXML_ICONV_ENABLED */
1884
1885
/************************************************************************
1886
 *                  *
1887
 *    ICU based generic conversion functions    *
1888
 *                  *
1889
 ************************************************************************/
1890
1891
#ifdef LIBXML_ICU_ENABLED
1892
/**
1893
 * xmlUconvWrapper:
1894
 * @cd: ICU uconverter data structure
1895
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896
 * @out:  a pointer to an array of bytes to store the result
1897
 * @outlen:  the length of @out
1898
 * @in:  a pointer to an array of input bytes
1899
 * @inlen:  the length of @in
1900
 * @flush: if true, indicates end of input
1901
 *
1902
 * Returns 0 if success, or
1903
 *     -1 by lack of space, or
1904
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905
 *        the result of transformation can't fit into the encoding we want), or
1906
 *     -3 if there the last byte can't form a single output char.
1907
 *
1908
 * The value of @inlen after return is the number of octets consumed
1909
 *     as the return value is positive, else unpredictable.
1910
 * The value of @outlen after return is the number of octets produced.
1911
 */
1912
static int
1913
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1914
                const unsigned char *in, int *inlen, int flush) {
1915
    const char *ucv_in = (const char *) in;
1916
    char *ucv_out = (char *) out;
1917
    UErrorCode err = U_ZERO_ERROR;
1918
1919
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920
        if (outlen != NULL) *outlen = 0;
1921
        return(-1);
1922
    }
1923
1924
    if (toUnicode) {
1925
        /* encoding => UTF-16 => UTF-8 */
1926
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1927
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1928
                       &cd->pivot_source, &cd->pivot_target,
1929
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930
    } else {
1931
        /* UTF-8 => UTF-16 => encoding */
1932
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1933
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1934
                       &cd->pivot_source, &cd->pivot_target,
1935
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936
    }
1937
    *inlen = ucv_in - (const char*) in;
1938
    *outlen = ucv_out - (char *) out;
1939
    if (U_SUCCESS(err)) {
1940
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1941
        if (flush)
1942
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943
        return 0;
1944
    }
1945
    if (err == U_BUFFER_OVERFLOW_ERROR)
1946
        return -1;
1947
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948
        return -2;
1949
    return -3;
1950
}
1951
#endif /* LIBXML_ICU_ENABLED */
1952
1953
/************************************************************************
1954
 *                  *
1955
 *    The real API used by libxml for on-the-fly conversion *
1956
 *                  *
1957
 ************************************************************************/
1958
1959
/**
1960
 * xmlEncInputChunk:
1961
 * @handler:  encoding handler
1962
 * @out:  a pointer to an array of bytes to store the result
1963
 * @outlen:  the length of @out
1964
 * @in:  a pointer to an array of input bytes
1965
 * @inlen:  the length of @in
1966
 * @flush:  flush (ICU-related)
1967
 *
1968
 * Returns 0 if success, or
1969
 *     -1 by lack of space, or
1970
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1971
 *        the result of transformation can't fit into the encoding we want), or
1972
 *     -3 if there the last byte can't form a single output char.
1973
 *
1974
 * The value of @inlen after return is the number of octets consumed
1975
 *     as the return value is 0, else unpredictable.
1976
 * The value of @outlen after return is the number of octets produced.
1977
 */
1978
static int
1979
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1980
150k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
1981
150k
    int ret;
1982
150k
    (void)flush;
1983
1984
150k
    if (handler->input != NULL) {
1985
85.1k
        ret = handler->input(out, outlen, in, inlen);
1986
85.1k
        if (ret > 0)
1987
63.6k
           ret = 0;
1988
85.1k
    }
1989
65.4k
#ifdef LIBXML_ICONV_ENABLED
1990
65.4k
    else if (handler->iconv_in != NULL) {
1991
65.4k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992
65.4k
    }
1993
1
#endif /* LIBXML_ICONV_ENABLED */
1994
#ifdef LIBXML_ICU_ENABLED
1995
    else if (handler->uconv_in != NULL) {
1996
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1997
                              flush);
1998
    }
1999
#endif /* LIBXML_ICU_ENABLED */
2000
1
    else {
2001
1
        *outlen = 0;
2002
1
        *inlen = 0;
2003
1
        ret = -2;
2004
1
    }
2005
2006
150k
    return(ret);
2007
150k
}
2008
2009
/**
2010
 * xmlEncOutputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 *
2017
 * Returns 0 if success, or
2018
 *     -1 by lack of space, or
2019
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2020
 *        the result of transformation can't fit into the encoding we want), or
2021
 *     -3 if there the last byte can't form a single output char.
2022
 *     -4 if no output function was found.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
0
                  int *outlen, const unsigned char *in, int *inlen) {
2031
0
    int ret;
2032
2033
0
    if (handler->output != NULL) {
2034
0
        ret = handler->output(out, outlen, in, inlen);
2035
0
        if (ret > 0)
2036
0
           ret = 0;
2037
0
    }
2038
0
#ifdef LIBXML_ICONV_ENABLED
2039
0
    else if (handler->iconv_out != NULL) {
2040
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041
0
    }
2042
0
#endif /* LIBXML_ICONV_ENABLED */
2043
#ifdef LIBXML_ICU_ENABLED
2044
    else if (handler->uconv_out != NULL) {
2045
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2046
                              1);
2047
    }
2048
#endif /* LIBXML_ICU_ENABLED */
2049
0
    else {
2050
0
        *outlen = 0;
2051
0
        *inlen = 0;
2052
0
        ret = -4;
2053
0
    }
2054
2055
0
    return(ret);
2056
0
}
2057
2058
/**
2059
 * xmlCharEncFirstLineInt:
2060
 * @handler:  char encoding transformation data structure
2061
 * @out:  an xmlBuffer for the output.
2062
 * @in:  an xmlBuffer for the input
2063
 * @len:  number of bytes to convert for the first line, or -1
2064
 *
2065
 * Front-end for the encoding handler input function, but handle only
2066
 * the very first line, i.e. limit itself to 45 chars.
2067
 *
2068
 * Returns the number of byte written if success, or
2069
 *     -1 general error
2070
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071
 *        the result of transformation can't fit into the encoding we want), or
2072
 */
2073
int
2074
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075
0
                       xmlBufferPtr in, int len) {
2076
0
    int ret;
2077
0
    int written;
2078
0
    int toconv;
2079
2080
0
    if (handler == NULL) return(-1);
2081
0
    if (out == NULL) return(-1);
2082
0
    if (in == NULL) return(-1);
2083
2084
    /* calculate space available */
2085
0
    written = out->size - out->use - 1; /* count '\0' */
2086
0
    toconv = in->use;
2087
    /*
2088
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089
     * 45 chars should be sufficient to reach the end of the encoding
2090
     * declaration without going too far inside the document content.
2091
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2092
     * The actual value depending on guessed encoding is passed as @len
2093
     * if provided
2094
     */
2095
0
    if (len >= 0) {
2096
0
        if (toconv > len)
2097
0
            toconv = len;
2098
0
    } else {
2099
0
        if (toconv > 180)
2100
0
            toconv = 180;
2101
0
    }
2102
0
    if (toconv * 2 >= written) {
2103
0
        xmlBufferGrow(out, toconv * 2);
2104
0
  written = out->size - out->use - 1;
2105
0
    }
2106
2107
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2108
0
                           in->content, &toconv, 0);
2109
0
    xmlBufferShrink(in, toconv);
2110
0
    out->use += written;
2111
0
    out->content[out->use] = 0;
2112
0
    if (ret == -1) ret = -3;
2113
2114
#ifdef DEBUG_ENCODING
2115
    switch (ret) {
2116
        case 0:
2117
      xmlGenericError(xmlGenericErrorContext,
2118
        "converted %d bytes to %d bytes of input\n",
2119
              toconv, written);
2120
      break;
2121
        case -1:
2122
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123
              toconv, written, in->use);
2124
      break;
2125
        case -2:
2126
      xmlGenericError(xmlGenericErrorContext,
2127
        "input conversion failed due to input error\n");
2128
      break;
2129
        case -3:
2130
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131
              toconv, written, in->use);
2132
      break;
2133
  default:
2134
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135
    }
2136
#endif /* DEBUG_ENCODING */
2137
    /*
2138
     * Ignore when input buffer is not on a boundary
2139
     */
2140
0
    if (ret == -3) ret = 0;
2141
0
    if (ret == -1) ret = 0;
2142
0
    return(written ? written : ret);
2143
0
}
2144
2145
/**
2146
 * xmlCharEncFirstLine:
2147
 * @handler:  char encoding transformation data structure
2148
 * @out:  an xmlBuffer for the output.
2149
 * @in:  an xmlBuffer for the input
2150
 *
2151
 * Front-end for the encoding handler input function, but handle only
2152
 * the very first line, i.e. limit itself to 45 chars.
2153
 *
2154
 * Returns the number of byte written if success, or
2155
 *     -1 general error
2156
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2157
 *        the result of transformation can't fit into the encoding we want), or
2158
 */
2159
int
2160
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161
0
                 xmlBufferPtr in) {
2162
0
    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163
0
}
2164
2165
/**
2166
 * xmlCharEncFirstLineInput:
2167
 * @input: a parser input buffer
2168
 * @len:  number of bytes to convert for the first line, or -1
2169
 *
2170
 * Front-end for the encoding handler input function, but handle only
2171
 * the very first line. Point is that this is based on autodetection
2172
 * of the encoding and once that first line is converted we may find
2173
 * out that a different decoder is needed to process the input.
2174
 *
2175
 * Returns the number of byte written if success, or
2176
 *     -1 general error
2177
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2178
 *        the result of transformation can't fit into the encoding we want), or
2179
 */
2180
int
2181
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182
2.92k
{
2183
2.92k
    int ret;
2184
2.92k
    size_t written;
2185
2.92k
    size_t toconv;
2186
2.92k
    int c_in;
2187
2.92k
    int c_out;
2188
2.92k
    xmlBufPtr in;
2189
2.92k
    xmlBufPtr out;
2190
2191
2.92k
    if ((input == NULL) || (input->encoder == NULL) ||
2192
2.92k
        (input->buffer == NULL) || (input->raw == NULL))
2193
0
        return (-1);
2194
2.92k
    out = input->buffer;
2195
2.92k
    in = input->raw;
2196
2197
2.92k
    toconv = xmlBufUse(in);
2198
2.92k
    if (toconv == 0)
2199
16
        return (0);
2200
2.91k
    written = xmlBufAvail(out);
2201
    /*
2202
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203
     * 45 chars should be sufficient to reach the end of the encoding
2204
     * declaration without going too far inside the document content.
2205
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2206
     * The actual value depending on guessed encoding is passed as @len
2207
     * if provided
2208
     */
2209
2.91k
    if (len >= 0) {
2210
0
        if (toconv > (unsigned int) len)
2211
0
            toconv = len;
2212
2.91k
    } else {
2213
2.91k
        if (toconv > 180)
2214
2.07k
            toconv = 180;
2215
2.91k
    }
2216
2.91k
    if (toconv * 2 >= written) {
2217
0
        xmlBufGrow(out, toconv * 2);
2218
0
        written = xmlBufAvail(out);
2219
0
    }
2220
2.91k
    if (written > 360)
2221
2.91k
        written = 360;
2222
2223
2.91k
    c_in = toconv;
2224
2.91k
    c_out = written;
2225
2.91k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2226
2.91k
                           xmlBufContent(in), &c_in, 0);
2227
2.91k
    xmlBufShrink(in, c_in);
2228
2.91k
    xmlBufAddLen(out, c_out);
2229
2.91k
    if (ret == -1)
2230
2.18k
        ret = -3;
2231
2232
2.91k
    switch (ret) {
2233
690
        case 0:
2234
#ifdef DEBUG_ENCODING
2235
            xmlGenericError(xmlGenericErrorContext,
2236
                            "converted %d bytes to %d bytes of input\n",
2237
                            c_in, c_out);
2238
#endif
2239
690
            break;
2240
0
        case -1:
2241
#ifdef DEBUG_ENCODING
2242
            xmlGenericError(xmlGenericErrorContext,
2243
                         "converted %d bytes to %d bytes of input, %d left\n",
2244
                            c_in, c_out, (int)xmlBufUse(in));
2245
#endif
2246
0
            break;
2247
2.18k
        case -3:
2248
#ifdef DEBUG_ENCODING
2249
            xmlGenericError(xmlGenericErrorContext,
2250
                        "converted %d bytes to %d bytes of input, %d left\n",
2251
                            c_in, c_out, (int)xmlBufUse(in));
2252
#endif
2253
2.18k
            break;
2254
35
        case -2: {
2255
35
            char buf[50];
2256
35
            const xmlChar *content = xmlBufContent(in);
2257
2258
35
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259
35
         content[0], content[1],
2260
35
         content[2], content[3]);
2261
35
      buf[49] = 0;
2262
35
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2263
35
        "input conversion failed due to input error, bytes %s\n",
2264
35
               buf);
2265
35
        }
2266
2.91k
    }
2267
    /*
2268
     * Ignore when input buffer is not on a boundary
2269
     */
2270
2.91k
    if (ret == -3) ret = 0;
2271
2.91k
    if (ret == -1) ret = 0;
2272
2.91k
    return(c_out ? c_out : ret);
2273
2.91k
}
2274
2275
/**
2276
 * xmlCharEncInput:
2277
 * @input: a parser input buffer
2278
 * @flush: try to flush all the raw buffer
2279
 *
2280
 * Generic front-end for the encoding handler on parser input
2281
 *
2282
 * Returns the number of byte written if success, or
2283
 *     -1 general error
2284
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2285
 *        the result of transformation can't fit into the encoding we want), or
2286
 */
2287
int
2288
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289
148k
{
2290
148k
    int ret;
2291
148k
    size_t written;
2292
148k
    size_t toconv;
2293
148k
    int c_in;
2294
148k
    int c_out;
2295
148k
    xmlBufPtr in;
2296
148k
    xmlBufPtr out;
2297
2298
148k
    if ((input == NULL) || (input->encoder == NULL) ||
2299
148k
        (input->buffer == NULL) || (input->raw == NULL))
2300
0
        return (-1);
2301
148k
    out = input->buffer;
2302
148k
    in = input->raw;
2303
2304
148k
    toconv = xmlBufUse(in);
2305
148k
    if (toconv == 0)
2306
747
        return (0);
2307
147k
    if ((toconv > 64 * 1024) && (flush == 0))
2308
2
        toconv = 64 * 1024;
2309
147k
    written = xmlBufAvail(out);
2310
147k
    if (toconv * 2 >= written) {
2311
17.2k
        xmlBufGrow(out, toconv * 2);
2312
17.2k
        written = xmlBufAvail(out);
2313
17.2k
    }
2314
147k
    if ((written > 128 * 1024) && (flush == 0))
2315
3
        written = 128 * 1024;
2316
2317
147k
    c_in = toconv;
2318
147k
    c_out = written;
2319
147k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2320
147k
                           xmlBufContent(in), &c_in, flush);
2321
147k
    xmlBufShrink(in, c_in);
2322
147k
    xmlBufAddLen(out, c_out);
2323
147k
    if (ret == -1)
2324
24.7k
        ret = -3;
2325
2326
147k
    switch (ret) {
2327
121k
        case 0:
2328
#ifdef DEBUG_ENCODING
2329
            xmlGenericError(xmlGenericErrorContext,
2330
                            "converted %d bytes to %d bytes of input\n",
2331
                            c_in, c_out);
2332
#endif
2333
121k
            break;
2334
0
        case -1:
2335
#ifdef DEBUG_ENCODING
2336
            xmlGenericError(xmlGenericErrorContext,
2337
                         "converted %d bytes to %d bytes of input, %d left\n",
2338
                            c_in, c_out, (int)xmlBufUse(in));
2339
#endif
2340
0
            break;
2341
25.2k
        case -3:
2342
#ifdef DEBUG_ENCODING
2343
            xmlGenericError(xmlGenericErrorContext,
2344
                        "converted %d bytes to %d bytes of input, %d left\n",
2345
                            c_in, c_out, (int)xmlBufUse(in));
2346
#endif
2347
25.2k
            break;
2348
636
        case -2: {
2349
636
            char buf[50];
2350
636
            const xmlChar *content = xmlBufContent(in);
2351
2352
636
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353
636
         content[0], content[1],
2354
636
         content[2], content[3]);
2355
636
      buf[49] = 0;
2356
636
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2357
636
        "input conversion failed due to input error, bytes %s\n",
2358
636
               buf);
2359
636
        }
2360
147k
    }
2361
    /*
2362
     * Ignore when input buffer is not on a boundary
2363
     */
2364
147k
    if (ret == -3)
2365
25.2k
        ret = 0;
2366
147k
    return (c_out? c_out : ret);
2367
147k
}
2368
2369
/**
2370
 * xmlCharEncInFunc:
2371
 * @handler:  char encoding transformation data structure
2372
 * @out:  an xmlBuffer for the output.
2373
 * @in:  an xmlBuffer for the input
2374
 *
2375
 * Generic front-end for the encoding handler input function
2376
 *
2377
 * Returns the number of byte written if success, or
2378
 *     -1 general error
2379
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2380
 *        the result of transformation can't fit into the encoding we want), or
2381
 */
2382
int
2383
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2384
                 xmlBufferPtr in)
2385
0
{
2386
0
    int ret;
2387
0
    int written;
2388
0
    int toconv;
2389
2390
0
    if (handler == NULL)
2391
0
        return (-1);
2392
0
    if (out == NULL)
2393
0
        return (-1);
2394
0
    if (in == NULL)
2395
0
        return (-1);
2396
2397
0
    toconv = in->use;
2398
0
    if (toconv == 0)
2399
0
        return (0);
2400
0
    written = out->size - out->use -1; /* count '\0' */
2401
0
    if (toconv * 2 >= written) {
2402
0
        xmlBufferGrow(out, out->size + toconv * 2);
2403
0
        written = out->size - out->use - 1;
2404
0
    }
2405
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2406
0
                           in->content, &toconv, 1);
2407
0
    xmlBufferShrink(in, toconv);
2408
0
    out->use += written;
2409
0
    out->content[out->use] = 0;
2410
0
    if (ret == -1)
2411
0
        ret = -3;
2412
2413
0
    switch (ret) {
2414
0
        case 0:
2415
#ifdef DEBUG_ENCODING
2416
            xmlGenericError(xmlGenericErrorContext,
2417
                            "converted %d bytes to %d bytes of input\n",
2418
                            toconv, written);
2419
#endif
2420
0
            break;
2421
0
        case -1:
2422
#ifdef DEBUG_ENCODING
2423
            xmlGenericError(xmlGenericErrorContext,
2424
                         "converted %d bytes to %d bytes of input, %d left\n",
2425
                            toconv, written, in->use);
2426
#endif
2427
0
            break;
2428
0
        case -3:
2429
#ifdef DEBUG_ENCODING
2430
            xmlGenericError(xmlGenericErrorContext,
2431
                        "converted %d bytes to %d bytes of input, %d left\n",
2432
                            toconv, written, in->use);
2433
#endif
2434
0
            break;
2435
0
        case -2: {
2436
0
            char buf[50];
2437
2438
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439
0
         in->content[0], in->content[1],
2440
0
         in->content[2], in->content[3]);
2441
0
      buf[49] = 0;
2442
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2443
0
        "input conversion failed due to input error, bytes %s\n",
2444
0
               buf);
2445
0
        }
2446
0
    }
2447
    /*
2448
     * Ignore when input buffer is not on a boundary
2449
     */
2450
0
    if (ret == -3)
2451
0
        ret = 0;
2452
0
    return (written? written : ret);
2453
0
}
2454
2455
#ifdef LIBXML_OUTPUT_ENABLED
2456
/**
2457
 * xmlCharEncOutput:
2458
 * @output: a parser output buffer
2459
 * @init: is this an initialization call without data
2460
 *
2461
 * Generic front-end for the encoding handler on parser output
2462
 * a first call with @init == 1 has to be made first to initiate the
2463
 * output in case of non-stateless encoding needing to initiate their
2464
 * state or the output (like the BOM in UTF16).
2465
 * In case of UTF8 sequence conversion errors for the given encoder,
2466
 * the content will be automatically remapped to a CharRef sequence.
2467
 *
2468
 * Returns the number of byte written if success, or
2469
 *     -1 general error
2470
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2471
 *        the result of transformation can't fit into the encoding we want), or
2472
 */
2473
int
2474
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2475
0
{
2476
0
    int ret;
2477
0
    size_t written;
2478
0
    int writtentot = 0;
2479
0
    size_t toconv;
2480
0
    int c_in;
2481
0
    int c_out;
2482
0
    xmlBufPtr in;
2483
0
    xmlBufPtr out;
2484
2485
0
    if ((output == NULL) || (output->encoder == NULL) ||
2486
0
        (output->buffer == NULL) || (output->conv == NULL))
2487
0
        return (-1);
2488
0
    out = output->conv;
2489
0
    in = output->buffer;
2490
2491
0
retry:
2492
2493
0
    written = xmlBufAvail(out);
2494
2495
    /*
2496
     * First specific handling of the initialization call
2497
     */
2498
0
    if (init) {
2499
0
        c_in = 0;
2500
0
        c_out = written;
2501
        /* TODO: Check return value. */
2502
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2503
0
                          NULL, &c_in);
2504
0
        xmlBufAddLen(out, c_out);
2505
#ifdef DEBUG_ENCODING
2506
  xmlGenericError(xmlGenericErrorContext,
2507
    "initialized encoder\n");
2508
#endif
2509
0
        return(c_out);
2510
0
    }
2511
2512
    /*
2513
     * Conversion itself.
2514
     */
2515
0
    toconv = xmlBufUse(in);
2516
0
    if (toconv == 0)
2517
0
        return (writtentot);
2518
0
    if (toconv > 64 * 1024)
2519
0
        toconv = 64 * 1024;
2520
0
    if (toconv * 4 >= written) {
2521
0
        xmlBufGrow(out, toconv * 4);
2522
0
        written = xmlBufAvail(out);
2523
0
    }
2524
0
    if (written > 256 * 1024)
2525
0
        written = 256 * 1024;
2526
2527
0
    c_in = toconv;
2528
0
    c_out = written;
2529
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2530
0
                            xmlBufContent(in), &c_in);
2531
0
    xmlBufShrink(in, c_in);
2532
0
    xmlBufAddLen(out, c_out);
2533
0
    writtentot += c_out;
2534
0
    if (ret == -1) {
2535
0
        if (c_out > 0) {
2536
            /* Can be a limitation of iconv or uconv */
2537
0
            goto retry;
2538
0
        }
2539
0
        ret = -3;
2540
0
    }
2541
2542
    /*
2543
     * Attempt to handle error cases
2544
     */
2545
0
    switch (ret) {
2546
0
        case 0:
2547
#ifdef DEBUG_ENCODING
2548
      xmlGenericError(xmlGenericErrorContext,
2549
        "converted %d bytes to %d bytes of output\n",
2550
              c_in, c_out);
2551
#endif
2552
0
      break;
2553
0
        case -1:
2554
#ifdef DEBUG_ENCODING
2555
      xmlGenericError(xmlGenericErrorContext,
2556
        "output conversion failed by lack of space\n");
2557
#endif
2558
0
      break;
2559
0
        case -3:
2560
#ifdef DEBUG_ENCODING
2561
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2562
              c_in, c_out, (int) xmlBufUse(in));
2563
#endif
2564
0
      break;
2565
0
        case -4:
2566
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2567
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2568
0
            ret = -1;
2569
0
            break;
2570
0
        case -2: {
2571
0
      xmlChar charref[20];
2572
0
      int len = (int) xmlBufUse(in);
2573
0
            xmlChar *content = xmlBufContent(in);
2574
0
      int cur, charrefLen;
2575
2576
0
      cur = xmlGetUTF8Char(content, &len);
2577
0
      if (cur <= 0)
2578
0
                break;
2579
2580
#ifdef DEBUG_ENCODING
2581
            xmlGenericError(xmlGenericErrorContext,
2582
                    "handling output conversion error\n");
2583
            xmlGenericError(xmlGenericErrorContext,
2584
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585
                    content[0], content[1],
2586
                    content[2], content[3]);
2587
#endif
2588
            /*
2589
             * Removes the UTF8 sequence, and replace it by a charref
2590
             * and continue the transcoding phase, hoping the error
2591
             * did not mangle the encoder state.
2592
             */
2593
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2594
0
                             "&#%d;", cur);
2595
0
            xmlBufShrink(in, len);
2596
0
            xmlBufGrow(out, charrefLen * 4);
2597
0
            c_out = xmlBufAvail(out);
2598
0
            c_in = charrefLen;
2599
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2600
0
                                    charref, &c_in);
2601
2602
0
      if ((ret < 0) || (c_in != charrefLen)) {
2603
0
    char buf[50];
2604
2605
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606
0
       content[0], content[1],
2607
0
       content[2], content[3]);
2608
0
    buf[49] = 0;
2609
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2610
0
        "output conversion failed due to conv error, bytes %s\n",
2611
0
             buf);
2612
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2613
0
        content[0] = ' ';
2614
0
                break;
2615
0
      }
2616
2617
0
            xmlBufAddLen(out, c_out);
2618
0
            writtentot += c_out;
2619
0
            goto retry;
2620
0
  }
2621
0
    }
2622
0
    return(writtentot ? writtentot : ret);
2623
0
}
2624
#endif
2625
2626
/**
2627
 * xmlCharEncOutFunc:
2628
 * @handler:  char encoding transformation data structure
2629
 * @out:  an xmlBuffer for the output.
2630
 * @in:  an xmlBuffer for the input
2631
 *
2632
 * Generic front-end for the encoding handler output function
2633
 * a first call with @in == NULL has to be made firs to initiate the
2634
 * output in case of non-stateless encoding needing to initiate their
2635
 * state or the output (like the BOM in UTF16).
2636
 * In case of UTF8 sequence conversion errors for the given encoder,
2637
 * the content will be automatically remapped to a CharRef sequence.
2638
 *
2639
 * Returns the number of byte written if success, or
2640
 *     -1 general error
2641
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2642
 *        the result of transformation can't fit into the encoding we want), or
2643
 */
2644
int
2645
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2646
0
                  xmlBufferPtr in) {
2647
0
    int ret;
2648
0
    int written;
2649
0
    int writtentot = 0;
2650
0
    int toconv;
2651
2652
0
    if (handler == NULL) return(-1);
2653
0
    if (out == NULL) return(-1);
2654
2655
0
retry:
2656
2657
0
    written = out->size - out->use;
2658
2659
0
    if (written > 0)
2660
0
  written--; /* Gennady: count '/0' */
2661
2662
    /*
2663
     * First specific handling of in = NULL, i.e. the initialization call
2664
     */
2665
0
    if (in == NULL) {
2666
0
        toconv = 0;
2667
        /* TODO: Check return value. */
2668
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669
0
                          NULL, &toconv);
2670
0
        out->use += written;
2671
0
        out->content[out->use] = 0;
2672
#ifdef DEBUG_ENCODING
2673
  xmlGenericError(xmlGenericErrorContext,
2674
    "initialized encoder\n");
2675
#endif
2676
0
        return(0);
2677
0
    }
2678
2679
    /*
2680
     * Conversion itself.
2681
     */
2682
0
    toconv = in->use;
2683
0
    if (toconv == 0)
2684
0
  return(0);
2685
0
    if (toconv * 4 >= written) {
2686
0
        xmlBufferGrow(out, toconv * 4);
2687
0
  written = out->size - out->use - 1;
2688
0
    }
2689
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690
0
                            in->content, &toconv);
2691
0
    xmlBufferShrink(in, toconv);
2692
0
    out->use += written;
2693
0
    writtentot += written;
2694
0
    out->content[out->use] = 0;
2695
0
    if (ret == -1) {
2696
0
        if (written > 0) {
2697
            /* Can be a limitation of iconv or uconv */
2698
0
            goto retry;
2699
0
        }
2700
0
        ret = -3;
2701
0
    }
2702
2703
    /*
2704
     * Attempt to handle error cases
2705
     */
2706
0
    switch (ret) {
2707
0
        case 0:
2708
#ifdef DEBUG_ENCODING
2709
      xmlGenericError(xmlGenericErrorContext,
2710
        "converted %d bytes to %d bytes of output\n",
2711
              toconv, written);
2712
#endif
2713
0
      break;
2714
0
        case -1:
2715
#ifdef DEBUG_ENCODING
2716
      xmlGenericError(xmlGenericErrorContext,
2717
        "output conversion failed by lack of space\n");
2718
#endif
2719
0
      break;
2720
0
        case -3:
2721
#ifdef DEBUG_ENCODING
2722
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723
              toconv, written, in->use);
2724
#endif
2725
0
      break;
2726
0
        case -4:
2727
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2729
0
      ret = -1;
2730
0
            break;
2731
0
        case -2: {
2732
0
      xmlChar charref[20];
2733
0
      int len = in->use;
2734
0
      const xmlChar *utf = (const xmlChar *) in->content;
2735
0
      int cur, charrefLen;
2736
2737
0
      cur = xmlGetUTF8Char(utf, &len);
2738
0
      if (cur <= 0)
2739
0
                break;
2740
2741
#ifdef DEBUG_ENCODING
2742
            xmlGenericError(xmlGenericErrorContext,
2743
                    "handling output conversion error\n");
2744
            xmlGenericError(xmlGenericErrorContext,
2745
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746
                    in->content[0], in->content[1],
2747
                    in->content[2], in->content[3]);
2748
#endif
2749
            /*
2750
             * Removes the UTF8 sequence, and replace it by a charref
2751
             * and continue the transcoding phase, hoping the error
2752
             * did not mangle the encoder state.
2753
             */
2754
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755
0
                             "&#%d;", cur);
2756
0
            xmlBufferShrink(in, len);
2757
0
            xmlBufferGrow(out, charrefLen * 4);
2758
0
      written = out->size - out->use - 1;
2759
0
            toconv = charrefLen;
2760
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761
0
                                    charref, &toconv);
2762
2763
0
      if ((ret < 0) || (toconv != charrefLen)) {
2764
0
    char buf[50];
2765
2766
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767
0
       in->content[0], in->content[1],
2768
0
       in->content[2], in->content[3]);
2769
0
    buf[49] = 0;
2770
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2771
0
        "output conversion failed due to conv error, bytes %s\n",
2772
0
             buf);
2773
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774
0
        in->content[0] = ' ';
2775
0
          break;
2776
0
      }
2777
2778
0
            out->use += written;
2779
0
            writtentot += written;
2780
0
            out->content[out->use] = 0;
2781
0
            goto retry;
2782
0
  }
2783
0
    }
2784
0
    return(writtentot ? writtentot : ret);
2785
0
}
2786
2787
/**
2788
 * xmlCharEncCloseFunc:
2789
 * @handler:  char encoding transformation data structure
2790
 *
2791
 * Generic front-end for encoding handler close function
2792
 *
2793
 * Returns 0 if success, or -1 in case of error
2794
 */
2795
int
2796
3.83k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2797
3.83k
    int ret = 0;
2798
3.83k
    int tofree = 0;
2799
3.83k
    int i, handler_in_list = 0;
2800
2801
    /* Avoid unused variable warning if features are disabled. */
2802
3.83k
    (void) handler_in_list;
2803
2804
3.83k
    if (handler == NULL) return(-1);
2805
3.83k
    if (handler->name == NULL) return(-1);
2806
3.83k
    if (handlers != NULL) {
2807
28.6k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2808
25.8k
            if (handler == handlers[i]) {
2809
1.07k
          handler_in_list = 1;
2810
1.07k
    break;
2811
1.07k
      }
2812
25.8k
  }
2813
3.83k
    }
2814
3.83k
#ifdef LIBXML_ICONV_ENABLED
2815
    /*
2816
     * Iconv handlers can be used only once, free the whole block.
2817
     * and the associated icon resources.
2818
     */
2819
3.83k
    if ((handler_in_list == 0) &&
2820
3.83k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821
2.75k
        tofree = 1;
2822
2.75k
  if (handler->iconv_out != NULL) {
2823
2.75k
      if (iconv_close(handler->iconv_out))
2824
0
    ret = -1;
2825
2.75k
      handler->iconv_out = NULL;
2826
2.75k
  }
2827
2.75k
  if (handler->iconv_in != NULL) {
2828
2.75k
      if (iconv_close(handler->iconv_in))
2829
0
    ret = -1;
2830
2.75k
      handler->iconv_in = NULL;
2831
2.75k
  }
2832
2.75k
    }
2833
3.83k
#endif /* LIBXML_ICONV_ENABLED */
2834
#ifdef LIBXML_ICU_ENABLED
2835
    if ((handler_in_list == 0) &&
2836
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837
        tofree = 1;
2838
  if (handler->uconv_out != NULL) {
2839
      closeIcuConverter(handler->uconv_out);
2840
      handler->uconv_out = NULL;
2841
  }
2842
  if (handler->uconv_in != NULL) {
2843
      closeIcuConverter(handler->uconv_in);
2844
      handler->uconv_in = NULL;
2845
  }
2846
    }
2847
#endif
2848
3.83k
    if (tofree) {
2849
        /* free up only dynamic handlers iconv/uconv */
2850
2.75k
        if (handler->name != NULL)
2851
2.75k
            xmlFree(handler->name);
2852
2.75k
        handler->name = NULL;
2853
2.75k
        xmlFree(handler);
2854
2.75k
    }
2855
#ifdef DEBUG_ENCODING
2856
    if (ret)
2857
        xmlGenericError(xmlGenericErrorContext,
2858
    "failed to close the encoding handler\n");
2859
    else
2860
        xmlGenericError(xmlGenericErrorContext,
2861
    "closed the encoding handler\n");
2862
#endif
2863
2864
3.83k
    return(ret);
2865
3.83k
}
2866
2867
/**
2868
 * xmlByteConsumed:
2869
 * @ctxt: an XML parser context
2870
 *
2871
 * This function provides the current index of the parser relative
2872
 * to the start of the current entity. This function is computed in
2873
 * bytes from the beginning starting at zero and finishing at the
2874
 * size in byte of the file if parsing a file. The function is
2875
 * of constant cost if the input is UTF-8 but can be costly if run
2876
 * on non-UTF-8 input.
2877
 *
2878
 * Returns the index in bytes from the beginning of the entity or -1
2879
 *         in case the index could not be computed.
2880
 */
2881
long
2882
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2883
0
    xmlParserInputPtr in;
2884
2885
0
    if (ctxt == NULL) return(-1);
2886
0
    in = ctxt->input;
2887
0
    if (in == NULL)  return(-1);
2888
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889
0
        unsigned int unused = 0;
2890
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2891
        /*
2892
   * Encoding conversion, compute the number of unused original
2893
   * bytes from the input not consumed and subtract that from
2894
   * the raw consumed value, this is not a cheap operation
2895
   */
2896
0
        if (in->end - in->cur > 0) {
2897
0
      unsigned char convbuf[32000];
2898
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2899
0
      int toconv = in->end - in->cur, written = 32000;
2900
2901
0
      int ret;
2902
2903
0
            do {
2904
0
                toconv = in->end - cur;
2905
0
                written = 32000;
2906
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907
0
                                        cur, &toconv);
2908
0
                if (ret < 0) {
2909
0
                    if (written > 0)
2910
0
                        ret = -2;
2911
0
                    else
2912
0
                        return(-1);
2913
0
                }
2914
0
                unused += written;
2915
0
                cur += toconv;
2916
0
            } while (ret == -2);
2917
0
  }
2918
0
  if (in->buf->rawconsumed < unused)
2919
0
      return(-1);
2920
0
  return(in->buf->rawconsumed - unused);
2921
0
    }
2922
0
    return(in->consumed + (in->cur - in->base));
2923
0
}
2924
2925
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926
#ifdef LIBXML_ISO8859X_ENABLED
2927
2928
/**
2929
 * UTF8ToISO8859x:
2930
 * @out:  a pointer to an array of bytes to store the result
2931
 * @outlen:  the length of @out
2932
 * @in:  a pointer to an array of UTF-8 chars
2933
 * @inlen:  the length of @in
2934
 * @xlattable: the 2-level transcoding table
2935
 *
2936
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937
 * block of chars out.
2938
 *
2939
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940
 * The value of @inlen after return is the number of octets consumed
2941
 *     as the return value is positive, else unpredictable.
2942
 * The value of @outlen after return is the number of octets consumed.
2943
 */
2944
static int
2945
UTF8ToISO8859x(unsigned char* out, int *outlen,
2946
              const unsigned char* in, int *inlen,
2947
              const unsigned char* const xlattable) {
2948
    const unsigned char* outstart = out;
2949
    const unsigned char* inend;
2950
    const unsigned char* instart = in;
2951
    const unsigned char* processed = in;
2952
2953
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2954
        (xlattable == NULL))
2955
  return(-1);
2956
    if (in == NULL) {
2957
        /*
2958
        * initialization nothing to do
2959
        */
2960
        *outlen = 0;
2961
        *inlen = 0;
2962
        return(0);
2963
    }
2964
    inend = in + (*inlen);
2965
    while (in < inend) {
2966
        unsigned char d = *in++;
2967
        if  (d < 0x80)  {
2968
            *out++ = d;
2969
        } else if (d < 0xC0) {
2970
            /* trailing byte in leading position */
2971
            *outlen = out - outstart;
2972
            *inlen = processed - instart;
2973
            return(-2);
2974
        } else if (d < 0xE0) {
2975
            unsigned char c;
2976
            if (!(in < inend)) {
2977
                /* trailing byte not in input buffer */
2978
                *outlen = out - outstart;
2979
                *inlen = processed - instart;
2980
                return(-3);
2981
            }
2982
            c = *in++;
2983
            if ((c & 0xC0) != 0x80) {
2984
                /* not a trailing byte */
2985
                *outlen = out - outstart;
2986
                *inlen = processed - instart;
2987
                return(-2);
2988
            }
2989
            c = c & 0x3F;
2990
            d = d & 0x1F;
2991
            d = xlattable [48 + c + xlattable [d] * 64];
2992
            if (d == 0) {
2993
                /* not in character set */
2994
                *outlen = out - outstart;
2995
                *inlen = processed - instart;
2996
                return(-2);
2997
            }
2998
            *out++ = d;
2999
        } else if (d < 0xF0) {
3000
            unsigned char c1;
3001
            unsigned char c2;
3002
            if (!(in < inend - 1)) {
3003
                /* trailing bytes not in input buffer */
3004
                *outlen = out - outstart;
3005
                *inlen = processed - instart;
3006
                return(-3);
3007
            }
3008
            c1 = *in++;
3009
            if ((c1 & 0xC0) != 0x80) {
3010
                /* not a trailing byte (c1) */
3011
                *outlen = out - outstart;
3012
                *inlen = processed - instart;
3013
                return(-2);
3014
            }
3015
            c2 = *in++;
3016
            if ((c2 & 0xC0) != 0x80) {
3017
                /* not a trailing byte (c2) */
3018
                *outlen = out - outstart;
3019
                *inlen = processed - instart;
3020
                return(-2);
3021
            }
3022
            c1 = c1 & 0x3F;
3023
            c2 = c2 & 0x3F;
3024
      d = d & 0x0F;
3025
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3026
      xlattable [32 + d] * 64] * 64];
3027
            if (d == 0) {
3028
                /* not in character set */
3029
                *outlen = out - outstart;
3030
                *inlen = processed - instart;
3031
                return(-2);
3032
            }
3033
            *out++ = d;
3034
        } else {
3035
            /* cannot transcode >= U+010000 */
3036
            *outlen = out - outstart;
3037
            *inlen = processed - instart;
3038
            return(-2);
3039
        }
3040
        processed = in;
3041
    }
3042
    *outlen = out - outstart;
3043
    *inlen = processed - instart;
3044
    return(*outlen);
3045
}
3046
3047
/**
3048
 * ISO8859xToUTF8
3049
 * @out:  a pointer to an array of bytes to store the result
3050
 * @outlen:  the length of @out
3051
 * @in:  a pointer to an array of ISO Latin 1 chars
3052
 * @inlen:  the length of @in
3053
 *
3054
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055
 * block of chars out.
3056
 * Returns 0 if success, or -1 otherwise
3057
 * The value of @inlen after return is the number of octets consumed
3058
 * The value of @outlen after return is the number of octets produced.
3059
 */
3060
static int
3061
ISO8859xToUTF8(unsigned char* out, int *outlen,
3062
              const unsigned char* in, int *inlen,
3063
              unsigned short const *unicodetable) {
3064
    unsigned char* outstart = out;
3065
    unsigned char* outend;
3066
    const unsigned char* instart = in;
3067
    const unsigned char* inend;
3068
    const unsigned char* instop;
3069
    unsigned int c;
3070
3071
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3072
        (in == NULL) || (unicodetable == NULL))
3073
  return(-1);
3074
    outend = out + *outlen;
3075
    inend = in + *inlen;
3076
    instop = inend;
3077
3078
    while ((in < inend) && (out < outend - 2)) {
3079
        if (*in >= 0x80) {
3080
            c = unicodetable [*in - 0x80];
3081
            if (c == 0) {
3082
                /* undefined code point */
3083
                *outlen = out - outstart;
3084
                *inlen = in - instart;
3085
                return (-1);
3086
            }
3087
            if (c < 0x800) {
3088
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3089
                *out++ = (c & 0x3F) | 0x80;
3090
            } else {
3091
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3092
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3093
                *out++ = (c & 0x3F) | 0x80;
3094
            }
3095
            ++in;
3096
        }
3097
        if (instop - in > outend - out) instop = in + (outend - out);
3098
        while ((*in < 0x80) && (in < instop)) {
3099
            *out++ = *in++;
3100
        }
3101
    }
3102
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3103
        *out++ =  *in++;
3104
    }
3105
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3106
        *out++ =  *in++;
3107
    }
3108
    *outlen = out - outstart;
3109
    *inlen = in - instart;
3110
    return (*outlen);
3111
}
3112
3113
3114
/************************************************************************
3115
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3116
 ************************************************************************/
3117
3118
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3119
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3135
};
3136
3137
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3138
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3165
};
3166
3167
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3168
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3184
};
3185
3186
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3187
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3218
};
3219
3220
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3221
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3237
};
3238
3239
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3240
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3267
};
3268
3269
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3270
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3286
};
3287
3288
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3289
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
};
3317
3318
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3319
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3335
};
3336
3337
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3338
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
};
3362
3363
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3364
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3380
};
3381
3382
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3383
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
};
3415
3416
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3417
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3433
};
3434
3435
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3436
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467
};
3468
3469
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3470
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3486
};
3487
3488
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3489
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
};
3513
3514
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3515
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3531
};
3532
3533
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3534
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3565
};
3566
3567
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3568
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3584
};
3585
3586
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3587
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
};
3615
3616
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3617
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3633
};
3634
3635
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3636
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3667
};
3668
3669
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3670
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3686
};
3687
3688
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3689
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3732
};
3733
3734
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3735
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3751
};
3752
3753
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3754
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3781
};
3782
3783
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3784
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3800
};
3801
3802
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3803
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3842
};
3843
3844
3845
/*
3846
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3847
 */
3848
3849
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3850
    const unsigned char* in, int *inlen) {
3851
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3852
}
3853
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3854
    const unsigned char* in, int *inlen) {
3855
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3856
}
3857
3858
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3859
    const unsigned char* in, int *inlen) {
3860
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3861
}
3862
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3863
    const unsigned char* in, int *inlen) {
3864
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3865
}
3866
3867
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3868
    const unsigned char* in, int *inlen) {
3869
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3870
}
3871
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3874
}
3875
3876
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3877
    const unsigned char* in, int *inlen) {
3878
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3879
}
3880
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3883
}
3884
3885
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3886
    const unsigned char* in, int *inlen) {
3887
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3888
}
3889
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3892
}
3893
3894
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3895
    const unsigned char* in, int *inlen) {
3896
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3897
}
3898
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3901
}
3902
3903
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3904
    const unsigned char* in, int *inlen) {
3905
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3906
}
3907
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3910
}
3911
3912
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3913
    const unsigned char* in, int *inlen) {
3914
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3915
}
3916
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3919
}
3920
3921
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3922
    const unsigned char* in, int *inlen) {
3923
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3924
}
3925
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3928
}
3929
3930
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3931
    const unsigned char* in, int *inlen) {
3932
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3933
}
3934
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3937
}
3938
3939
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3940
    const unsigned char* in, int *inlen) {
3941
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3942
}
3943
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3946
}
3947
3948
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3949
    const unsigned char* in, int *inlen) {
3950
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3951
}
3952
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3955
}
3956
3957
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3958
    const unsigned char* in, int *inlen) {
3959
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3960
}
3961
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3964
}
3965
3966
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3967
    const unsigned char* in, int *inlen) {
3968
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3969
}
3970
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3973
}
3974
3975
static void
3976
xmlRegisterCharEncodingHandlersISO8859x (void) {
3977
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3978
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3979
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3980
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3981
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3982
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3983
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3984
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3985
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3986
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3987
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3988
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3989
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3990
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3991
}
3992
3993
#endif
3994
#endif
3995