Coverage Report

Created: 2022-06-08 06:16

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "buf.h"
44
#include "enc.h"
45
46
#ifdef LIBXML_ICU_ENABLED
47
#include <unicode/ucnv.h>
48
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49
#define ICU_PIVOT_BUF_SIZE 1024
50
typedef struct _uconv_t uconv_t;
51
struct _uconv_t {
52
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
55
  UChar      *pivot_source;
56
  UChar      *pivot_target;
57
};
58
#endif
59
60
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62
63
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65
struct _xmlCharEncodingAlias {
66
    const char *name;
67
    const char *alias;
68
};
69
70
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71
static int xmlCharEncodingAliasesNb = 0;
72
static int xmlCharEncodingAliasesMax = 0;
73
74
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75
#if 0
76
#define DEBUG_ENCODING  /* Define this to get encoding traces */
77
#endif
78
#else
79
#ifdef LIBXML_ISO8859X_ENABLED
80
static void xmlRegisterCharEncodingHandlersISO8859x (void);
81
#endif
82
#endif
83
84
static int xmlLittleEndian = 1;
85
86
/**
87
 * xmlEncodingErrMemory:
88
 * @extra:  extra information
89
 *
90
 * Handle an out of memory condition
91
 */
92
static void
93
xmlEncodingErrMemory(const char *extra)
94
0
{
95
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96
0
}
97
98
/**
99
 * xmlErrEncoding:
100
 * @error:  the error number
101
 * @msg:  the error message
102
 *
103
 * n encoding error
104
 */
105
static void LIBXML_ATTR_FORMAT(2,0)
106
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107
154k
{
108
154k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109
154k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
110
154k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111
154k
}
112
113
#ifdef LIBXML_ICU_ENABLED
114
static uconv_t*
115
openIcuConverter(const char* name, int toUnicode)
116
{
117
  UErrorCode status = U_ZERO_ERROR;
118
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119
  if (conv == NULL)
120
    return NULL;
121
122
  conv->pivot_source = conv->pivot_buf;
123
  conv->pivot_target = conv->pivot_buf;
124
125
  conv->uconv = ucnv_open(name, &status);
126
  if (U_FAILURE(status))
127
    goto error;
128
129
  status = U_ZERO_ERROR;
130
  if (toUnicode) {
131
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132
                        NULL, NULL, NULL, &status);
133
  }
134
  else {
135
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136
                        NULL, NULL, NULL, &status);
137
  }
138
  if (U_FAILURE(status))
139
    goto error;
140
141
  status = U_ZERO_ERROR;
142
  conv->utf8 = ucnv_open("UTF-8", &status);
143
  if (U_SUCCESS(status))
144
    return conv;
145
146
error:
147
  if (conv->uconv)
148
    ucnv_close(conv->uconv);
149
  xmlFree(conv);
150
  return NULL;
151
}
152
153
static void
154
closeIcuConverter(uconv_t *conv)
155
{
156
  if (conv != NULL) {
157
    ucnv_close(conv->uconv);
158
    ucnv_close(conv->utf8);
159
    xmlFree(conv);
160
  }
161
}
162
#endif /* LIBXML_ICU_ENABLED */
163
164
/************************************************************************
165
 *                  *
166
 *    Conversions To/From UTF8 encoding     *
167
 *                  *
168
 ************************************************************************/
169
170
/**
171
 * asciiToUTF8:
172
 * @out:  a pointer to an array of bytes to store the result
173
 * @outlen:  the length of @out
174
 * @in:  a pointer to an array of ASCII chars
175
 * @inlen:  the length of @in
176
 *
177
 * Take a block of ASCII chars in and try to convert it to an UTF-8
178
 * block of chars out.
179
 * Returns 0 if success, or -1 otherwise
180
 * The value of @inlen after return is the number of octets consumed
181
 *     if the return value is positive, else unpredictable.
182
 * The value of @outlen after return is the number of octets produced.
183
 */
184
static int
185
asciiToUTF8(unsigned char* out, int *outlen,
186
826
              const unsigned char* in, int *inlen) {
187
826
    unsigned char* outstart = out;
188
826
    const unsigned char* base = in;
189
826
    const unsigned char* processed = in;
190
826
    unsigned char* outend = out + *outlen;
191
826
    const unsigned char* inend;
192
826
    unsigned int c;
193
194
826
    inend = in + (*inlen);
195
1.90k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
196
1.89k
  c= *in++;
197
198
1.89k
        if (out >= outend)
199
0
      break;
200
1.89k
        if (c < 0x80) {
201
1.07k
      *out++ = c;
202
1.07k
  } else {
203
815
      *outlen = out - outstart;
204
815
      *inlen = processed - base;
205
815
      return(-1);
206
815
  }
207
208
1.07k
  processed = (const unsigned char*) in;
209
1.07k
    }
210
11
    *outlen = out - outstart;
211
11
    *inlen = processed - base;
212
11
    return(*outlen);
213
826
}
214
215
#ifdef LIBXML_OUTPUT_ENABLED
216
/**
217
 * UTF8Toascii:
218
 * @out:  a pointer to an array of bytes to store the result
219
 * @outlen:  the length of @out
220
 * @in:  a pointer to an array of UTF-8 chars
221
 * @inlen:  the length of @in
222
 *
223
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
224
 * block of chars out.
225
 *
226
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227
 * The value of @inlen after return is the number of octets consumed
228
 *     if the return value is positive, else unpredictable.
229
 * The value of @outlen after return is the number of octets produced.
230
 */
231
static int
232
UTF8Toascii(unsigned char* out, int *outlen,
233
0
              const unsigned char* in, int *inlen) {
234
0
    const unsigned char* processed = in;
235
0
    const unsigned char* outend;
236
0
    const unsigned char* outstart = out;
237
0
    const unsigned char* instart = in;
238
0
    const unsigned char* inend;
239
0
    unsigned int c, d;
240
0
    int trailing;
241
242
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243
0
    if (in == NULL) {
244
        /*
245
   * initialization nothing to do
246
   */
247
0
  *outlen = 0;
248
0
  *inlen = 0;
249
0
  return(0);
250
0
    }
251
0
    inend = in + (*inlen);
252
0
    outend = out + (*outlen);
253
0
    while (in < inend) {
254
0
  d = *in++;
255
0
  if      (d < 0x80)  { c= d; trailing= 0; }
256
0
  else if (d < 0xC0) {
257
      /* trailing byte in leading position */
258
0
      *outlen = out - outstart;
259
0
      *inlen = processed - instart;
260
0
      return(-2);
261
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
262
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
263
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
264
0
  else {
265
      /* no chance for this in Ascii */
266
0
      *outlen = out - outstart;
267
0
      *inlen = processed - instart;
268
0
      return(-2);
269
0
  }
270
271
0
  if (inend - in < trailing) {
272
0
      break;
273
0
  }
274
275
0
  for ( ; trailing; trailing--) {
276
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277
0
    break;
278
0
      c <<= 6;
279
0
      c |= d & 0x3F;
280
0
  }
281
282
  /* assertion: c is a single UTF-4 value */
283
0
  if (c < 0x80) {
284
0
      if (out >= outend)
285
0
    break;
286
0
      *out++ = c;
287
0
  } else {
288
      /* no chance for this in Ascii */
289
0
      *outlen = out - outstart;
290
0
      *inlen = processed - instart;
291
0
      return(-2);
292
0
  }
293
0
  processed = in;
294
0
    }
295
0
    *outlen = out - outstart;
296
0
    *inlen = processed - instart;
297
0
    return(*outlen);
298
0
}
299
#endif /* LIBXML_OUTPUT_ENABLED */
300
301
/**
302
 * isolat1ToUTF8:
303
 * @out:  a pointer to an array of bytes to store the result
304
 * @outlen:  the length of @out
305
 * @in:  a pointer to an array of ISO Latin 1 chars
306
 * @inlen:  the length of @in
307
 *
308
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309
 * block of chars out.
310
 * Returns the number of bytes written if success, or -1 otherwise
311
 * The value of @inlen after return is the number of octets consumed
312
 *     if the return value is positive, else unpredictable.
313
 * The value of @outlen after return is the number of octets produced.
314
 */
315
int
316
isolat1ToUTF8(unsigned char* out, int *outlen,
317
302
              const unsigned char* in, int *inlen) {
318
302
    unsigned char* outstart = out;
319
302
    const unsigned char* base = in;
320
302
    unsigned char* outend;
321
302
    const unsigned char* inend;
322
302
    const unsigned char* instop;
323
324
302
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325
0
  return(-1);
326
327
302
    outend = out + *outlen;
328
302
    inend = in + (*inlen);
329
302
    instop = inend;
330
331
2.18M
    while ((in < inend) && (out < outend - 1)) {
332
2.18M
  if (*in >= 0x80) {
333
2.18M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
334
2.18M
            *out++ = ((*in) & 0x3F) | 0x80;
335
2.18M
      ++in;
336
2.18M
  }
337
2.18M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
338
10.7M
  while ((in < instop) && (*in < 0x80)) {
339
8.57M
      *out++ = *in++;
340
8.57M
  }
341
2.18M
    }
342
302
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
343
0
        *out++ = *in++;
344
0
    }
345
302
    *outlen = out - outstart;
346
302
    *inlen = in - base;
347
302
    return(*outlen);
348
302
}
349
350
/**
351
 * UTF8ToUTF8:
352
 * @out:  a pointer to an array of bytes to store the result
353
 * @outlen:  the length of @out
354
 * @inb:  a pointer to an array of UTF-8 chars
355
 * @inlenb:  the length of @in in UTF-8 chars
356
 *
357
 * No op copy operation for UTF8 handling.
358
 *
359
 * Returns the number of bytes written, or -1 if lack of space.
360
 *     The value of *inlen after return is the number of octets consumed
361
 *     if the return value is positive, else unpredictable.
362
 */
363
static int
364
UTF8ToUTF8(unsigned char* out, int *outlen,
365
           const unsigned char* inb, int *inlenb)
366
0
{
367
0
    int len;
368
369
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370
0
  return(-1);
371
0
    if (inb == NULL) {
372
        /* inb == NULL means output is initialized. */
373
0
        *outlen = 0;
374
0
        *inlenb = 0;
375
0
        return(0);
376
0
    }
377
0
    if (*outlen > *inlenb) {
378
0
  len = *inlenb;
379
0
    } else {
380
0
  len = *outlen;
381
0
    }
382
0
    if (len < 0)
383
0
  return(-1);
384
385
    /*
386
     * FIXME: Conversion functions must assure valid UTF-8, so we have
387
     * to check for UTF-8 validity. Preferably, this converter shouldn't
388
     * be used at all.
389
     */
390
0
    memcpy(out, inb, len);
391
392
0
    *outlen = len;
393
0
    *inlenb = len;
394
0
    return(*outlen);
395
0
}
396
397
398
#ifdef LIBXML_OUTPUT_ENABLED
399
/**
400
 * UTF8Toisolat1:
401
 * @out:  a pointer to an array of bytes to store the result
402
 * @outlen:  the length of @out
403
 * @in:  a pointer to an array of UTF-8 chars
404
 * @inlen:  the length of @in
405
 *
406
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407
 * block of chars out.
408
 *
409
 * Returns the number of bytes written if success, -2 if the transcoding fails,
410
           or -1 otherwise
411
 * The value of @inlen after return is the number of octets consumed
412
 *     if the return value is positive, else unpredictable.
413
 * The value of @outlen after return is the number of octets produced.
414
 */
415
int
416
UTF8Toisolat1(unsigned char* out, int *outlen,
417
0
              const unsigned char* in, int *inlen) {
418
0
    const unsigned char* processed = in;
419
0
    const unsigned char* outend;
420
0
    const unsigned char* outstart = out;
421
0
    const unsigned char* instart = in;
422
0
    const unsigned char* inend;
423
0
    unsigned int c, d;
424
0
    int trailing;
425
426
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427
0
    if (in == NULL) {
428
        /*
429
   * initialization nothing to do
430
   */
431
0
  *outlen = 0;
432
0
  *inlen = 0;
433
0
  return(0);
434
0
    }
435
0
    inend = in + (*inlen);
436
0
    outend = out + (*outlen);
437
0
    while (in < inend) {
438
0
  d = *in++;
439
0
  if      (d < 0x80)  { c= d; trailing= 0; }
440
0
  else if (d < 0xC0) {
441
      /* trailing byte in leading position */
442
0
      *outlen = out - outstart;
443
0
      *inlen = processed - instart;
444
0
      return(-2);
445
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
446
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
447
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
448
0
  else {
449
      /* no chance for this in IsoLat1 */
450
0
      *outlen = out - outstart;
451
0
      *inlen = processed - instart;
452
0
      return(-2);
453
0
  }
454
455
0
  if (inend - in < trailing) {
456
0
      break;
457
0
  }
458
459
0
  for ( ; trailing; trailing--) {
460
0
      if (in >= inend)
461
0
    break;
462
0
      if (((d= *in++) & 0xC0) != 0x80) {
463
0
    *outlen = out - outstart;
464
0
    *inlen = processed - instart;
465
0
    return(-2);
466
0
      }
467
0
      c <<= 6;
468
0
      c |= d & 0x3F;
469
0
  }
470
471
  /* assertion: c is a single UTF-4 value */
472
0
  if (c <= 0xFF) {
473
0
      if (out >= outend)
474
0
    break;
475
0
      *out++ = c;
476
0
  } else {
477
      /* no chance for this in IsoLat1 */
478
0
      *outlen = out - outstart;
479
0
      *inlen = processed - instart;
480
0
      return(-2);
481
0
  }
482
0
  processed = in;
483
0
    }
484
0
    *outlen = out - outstart;
485
0
    *inlen = processed - instart;
486
0
    return(*outlen);
487
0
}
488
#endif /* LIBXML_OUTPUT_ENABLED */
489
490
/**
491
 * UTF16LEToUTF8:
492
 * @out:  a pointer to an array of bytes to store the result
493
 * @outlen:  the length of @out
494
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
495
 * @inlenb:  the length of @in in UTF-16LE chars
496
 *
497
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498
 * block of chars out. This function assumes the endian property
499
 * is the same between the native type of this machine and the
500
 * inputed one.
501
 *
502
 * Returns the number of bytes written, or -1 if lack of space, or -2
503
 *     if the transcoding fails (if *in is not a valid utf16 string)
504
 *     The value of *inlen after return is the number of octets consumed
505
 *     if the return value is positive, else unpredictable.
506
 */
507
static int
508
UTF16LEToUTF8(unsigned char* out, int *outlen,
509
            const unsigned char* inb, int *inlenb)
510
4.08k
{
511
4.08k
    unsigned char* outstart = out;
512
4.08k
    const unsigned char* processed = inb;
513
4.08k
    unsigned char* outend;
514
4.08k
    unsigned short* in = (unsigned short*) inb;
515
4.08k
    unsigned short* inend;
516
4.08k
    unsigned int c, d, inlen;
517
4.08k
    unsigned char *tmp;
518
4.08k
    int bits;
519
520
4.08k
    if (*outlen == 0) {
521
0
        *inlenb = 0;
522
0
        return(0);
523
0
    }
524
4.08k
    outend = out + *outlen;
525
4.08k
    if ((*inlenb % 2) == 1)
526
3.27k
        (*inlenb)--;
527
4.08k
    inlen = *inlenb / 2;
528
4.08k
    inend = in + inlen;
529
1.20M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
530
1.20M
        if (xmlLittleEndian) {
531
1.20M
      c= *in++;
532
1.20M
  } else {
533
0
      tmp = (unsigned char *) in;
534
0
      c = *tmp++;
535
0
      c = c | (((unsigned int)*tmp) << 8);
536
0
      in++;
537
0
  }
538
1.20M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
539
687
      if (in >= inend) {           /* handle split mutli-byte characters */
540
441
    break;
541
441
      }
542
246
      if (xmlLittleEndian) {
543
246
    d = *in++;
544
246
      } else {
545
0
    tmp = (unsigned char *) in;
546
0
    d = *tmp++;
547
0
    d = d | (((unsigned int)*tmp) << 8);
548
0
    in++;
549
0
      }
550
246
            if ((d & 0xFC00) == 0xDC00) {
551
217
                c &= 0x03FF;
552
217
                c <<= 10;
553
217
                c |= d & 0x03FF;
554
217
                c += 0x10000;
555
217
            }
556
29
            else {
557
29
    *outlen = out - outstart;
558
29
    *inlenb = processed - inb;
559
29
          return(-2);
560
29
      }
561
246
        }
562
563
  /* assertion: c is a single UTF-4 value */
564
1.20M
        if (out >= outend)
565
0
      break;
566
1.20M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
567
1.18M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
568
1.18M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
569
217
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
570
571
3.57M
        for ( ; bits >= 0; bits-= 6) {
572
2.37M
            if (out >= outend)
573
0
          break;
574
2.37M
            *out++= ((c >> bits) & 0x3F) | 0x80;
575
2.37M
        }
576
1.20M
  processed = (const unsigned char*) in;
577
1.20M
    }
578
4.05k
    *outlen = out - outstart;
579
4.05k
    *inlenb = processed - inb;
580
4.05k
    return(*outlen);
581
4.08k
}
582
583
#ifdef LIBXML_OUTPUT_ENABLED
584
/**
585
 * UTF8ToUTF16LE:
586
 * @outb:  a pointer to an array of bytes to store the result
587
 * @outlen:  the length of @outb
588
 * @in:  a pointer to an array of UTF-8 chars
589
 * @inlen:  the length of @in
590
 *
591
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592
 * block of chars out.
593
 *
594
 * Returns the number of bytes written, or -1 if lack of space, or -2
595
 *     if the transcoding failed.
596
 */
597
static int
598
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599
            const unsigned char* in, int *inlen)
600
0
{
601
0
    unsigned short* out = (unsigned short*) outb;
602
0
    const unsigned char* processed = in;
603
0
    const unsigned char *const instart = in;
604
0
    unsigned short* outstart= out;
605
0
    unsigned short* outend;
606
0
    const unsigned char* inend;
607
0
    unsigned int c, d;
608
0
    int trailing;
609
0
    unsigned char *tmp;
610
0
    unsigned short tmp1, tmp2;
611
612
    /* UTF16LE encoding has no BOM */
613
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614
0
    if (in == NULL) {
615
0
  *outlen = 0;
616
0
  *inlen = 0;
617
0
  return(0);
618
0
    }
619
0
    inend= in + *inlen;
620
0
    outend = out + (*outlen / 2);
621
0
    while (in < inend) {
622
0
      d= *in++;
623
0
      if      (d < 0x80)  { c= d; trailing= 0; }
624
0
      else if (d < 0xC0) {
625
          /* trailing byte in leading position */
626
0
    *outlen = (out - outstart) * 2;
627
0
    *inlen = processed - instart;
628
0
    return(-2);
629
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
630
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
631
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
632
0
      else {
633
  /* no chance for this in UTF-16 */
634
0
  *outlen = (out - outstart) * 2;
635
0
  *inlen = processed - instart;
636
0
  return(-2);
637
0
      }
638
639
0
      if (inend - in < trailing) {
640
0
          break;
641
0
      }
642
643
0
      for ( ; trailing; trailing--) {
644
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645
0
        break;
646
0
          c <<= 6;
647
0
          c |= d & 0x3F;
648
0
      }
649
650
      /* assertion: c is a single UTF-4 value */
651
0
        if (c < 0x10000) {
652
0
            if (out >= outend)
653
0
          break;
654
0
      if (xmlLittleEndian) {
655
0
    *out++ = c;
656
0
      } else {
657
0
    tmp = (unsigned char *) out;
658
0
    *tmp = c ;
659
0
    *(tmp + 1) = c >> 8 ;
660
0
    out++;
661
0
      }
662
0
        }
663
0
        else if (c < 0x110000) {
664
0
            if (out+1 >= outend)
665
0
          break;
666
0
            c -= 0x10000;
667
0
      if (xmlLittleEndian) {
668
0
    *out++ = 0xD800 | (c >> 10);
669
0
    *out++ = 0xDC00 | (c & 0x03FF);
670
0
      } else {
671
0
    tmp1 = 0xD800 | (c >> 10);
672
0
    tmp = (unsigned char *) out;
673
0
    *tmp = (unsigned char) tmp1;
674
0
    *(tmp + 1) = tmp1 >> 8;
675
0
    out++;
676
677
0
    tmp2 = 0xDC00 | (c & 0x03FF);
678
0
    tmp = (unsigned char *) out;
679
0
    *tmp  = (unsigned char) tmp2;
680
0
    *(tmp + 1) = tmp2 >> 8;
681
0
    out++;
682
0
      }
683
0
        }
684
0
        else
685
0
      break;
686
0
  processed = in;
687
0
    }
688
0
    *outlen = (out - outstart) * 2;
689
0
    *inlen = processed - instart;
690
0
    return(*outlen);
691
0
}
692
693
/**
694
 * UTF8ToUTF16:
695
 * @outb:  a pointer to an array of bytes to store the result
696
 * @outlen:  the length of @outb
697
 * @in:  a pointer to an array of UTF-8 chars
698
 * @inlen:  the length of @in
699
 *
700
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701
 * block of chars out.
702
 *
703
 * Returns the number of bytes written, or -1 if lack of space, or -2
704
 *     if the transcoding failed.
705
 */
706
static int
707
UTF8ToUTF16(unsigned char* outb, int *outlen,
708
            const unsigned char* in, int *inlen)
709
0
{
710
0
    if (in == NULL) {
711
  /*
712
   * initialization, add the Byte Order Mark for UTF-16LE
713
   */
714
0
        if (*outlen >= 2) {
715
0
      outb[0] = 0xFF;
716
0
      outb[1] = 0xFE;
717
0
      *outlen = 2;
718
0
      *inlen = 0;
719
#ifdef DEBUG_ENCODING
720
            xmlGenericError(xmlGenericErrorContext,
721
        "Added FFFE Byte Order Mark\n");
722
#endif
723
0
      return(2);
724
0
  }
725
0
  *outlen = 0;
726
0
  *inlen = 0;
727
0
  return(0);
728
0
    }
729
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730
0
}
731
#endif /* LIBXML_OUTPUT_ENABLED */
732
733
/**
734
 * UTF16BEToUTF8:
735
 * @out:  a pointer to an array of bytes to store the result
736
 * @outlen:  the length of @out
737
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
738
 * @inlenb:  the length of @in in UTF-16 chars
739
 *
740
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741
 * block of chars out. This function assumes the endian property
742
 * is the same between the native type of this machine and the
743
 * inputed one.
744
 *
745
 * Returns the number of bytes written, or -1 if lack of space, or -2
746
 *     if the transcoding fails (if *in is not a valid utf16 string)
747
 * The value of *inlen after return is the number of octets consumed
748
 *     if the return value is positive, else unpredictable.
749
 */
750
static int
751
UTF16BEToUTF8(unsigned char* out, int *outlen,
752
            const unsigned char* inb, int *inlenb)
753
1.24k
{
754
1.24k
    unsigned char* outstart = out;
755
1.24k
    const unsigned char* processed = inb;
756
1.24k
    unsigned char* outend;
757
1.24k
    unsigned short* in = (unsigned short*) inb;
758
1.24k
    unsigned short* inend;
759
1.24k
    unsigned int c, d, inlen;
760
1.24k
    unsigned char *tmp;
761
1.24k
    int bits;
762
763
1.24k
    if (*outlen == 0) {
764
0
        *inlenb = 0;
765
0
        return(0);
766
0
    }
767
1.24k
    outend = out + *outlen;
768
1.24k
    if ((*inlenb % 2) == 1)
769
698
        (*inlenb)--;
770
1.24k
    inlen = *inlenb / 2;
771
1.24k
    inend= in + inlen;
772
193k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
773
192k
  if (xmlLittleEndian) {
774
192k
      tmp = (unsigned char *) in;
775
192k
      c = *tmp++;
776
192k
      c = (c << 8) | (unsigned int) *tmp;
777
192k
      in++;
778
192k
  } else {
779
0
      c= *in++;
780
0
  }
781
192k
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
782
562
      if (in >= inend) {           /* handle split mutli-byte characters */
783
327
                break;
784
327
      }
785
235
      if (xmlLittleEndian) {
786
235
    tmp = (unsigned char *) in;
787
235
    d = *tmp++;
788
235
    d = (d << 8) | (unsigned int) *tmp;
789
235
    in++;
790
235
      } else {
791
0
    d= *in++;
792
0
      }
793
235
            if ((d & 0xFC00) == 0xDC00) {
794
208
                c &= 0x03FF;
795
208
                c <<= 10;
796
208
                c |= d & 0x03FF;
797
208
                c += 0x10000;
798
208
            }
799
27
            else {
800
27
    *outlen = out - outstart;
801
27
    *inlenb = processed - inb;
802
27
          return(-2);
803
27
      }
804
235
        }
805
806
  /* assertion: c is a single UTF-4 value */
807
192k
        if (out >= outend)
808
0
      break;
809
192k
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
810
191k
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
811
190k
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
812
208
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
813
814
574k
        for ( ; bits >= 0; bits-= 6) {
815
382k
            if (out >= outend)
816
0
          break;
817
382k
            *out++= ((c >> bits) & 0x3F) | 0x80;
818
382k
        }
819
192k
  processed = (const unsigned char*) in;
820
192k
    }
821
1.21k
    *outlen = out - outstart;
822
1.21k
    *inlenb = processed - inb;
823
1.21k
    return(*outlen);
824
1.24k
}
825
826
#ifdef LIBXML_OUTPUT_ENABLED
827
/**
828
 * UTF8ToUTF16BE:
829
 * @outb:  a pointer to an array of bytes to store the result
830
 * @outlen:  the length of @outb
831
 * @in:  a pointer to an array of UTF-8 chars
832
 * @inlen:  the length of @in
833
 *
834
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835
 * block of chars out.
836
 *
837
 * Returns the number of byte written, or -1 by lack of space, or -2
838
 *     if the transcoding failed.
839
 */
840
static int
841
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842
            const unsigned char* in, int *inlen)
843
0
{
844
0
    unsigned short* out = (unsigned short*) outb;
845
0
    const unsigned char* processed = in;
846
0
    const unsigned char *const instart = in;
847
0
    unsigned short* outstart= out;
848
0
    unsigned short* outend;
849
0
    const unsigned char* inend;
850
0
    unsigned int c, d;
851
0
    int trailing;
852
0
    unsigned char *tmp;
853
0
    unsigned short tmp1, tmp2;
854
855
    /* UTF-16BE has no BOM */
856
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857
0
    if (in == NULL) {
858
0
  *outlen = 0;
859
0
  *inlen = 0;
860
0
  return(0);
861
0
    }
862
0
    inend= in + *inlen;
863
0
    outend = out + (*outlen / 2);
864
0
    while (in < inend) {
865
0
      d= *in++;
866
0
      if      (d < 0x80)  { c= d; trailing= 0; }
867
0
      else if (d < 0xC0)  {
868
          /* trailing byte in leading position */
869
0
    *outlen = out - outstart;
870
0
    *inlen = processed - instart;
871
0
    return(-2);
872
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
873
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
874
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
875
0
      else {
876
          /* no chance for this in UTF-16 */
877
0
    *outlen = out - outstart;
878
0
    *inlen = processed - instart;
879
0
    return(-2);
880
0
      }
881
882
0
      if (inend - in < trailing) {
883
0
          break;
884
0
      }
885
886
0
      for ( ; trailing; trailing--) {
887
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
888
0
          c <<= 6;
889
0
          c |= d & 0x3F;
890
0
      }
891
892
      /* assertion: c is a single UTF-4 value */
893
0
        if (c < 0x10000) {
894
0
            if (out >= outend)  break;
895
0
      if (xmlLittleEndian) {
896
0
    tmp = (unsigned char *) out;
897
0
    *tmp = c >> 8;
898
0
    *(tmp + 1) = c;
899
0
    out++;
900
0
      } else {
901
0
    *out++ = c;
902
0
      }
903
0
        }
904
0
        else if (c < 0x110000) {
905
0
            if (out+1 >= outend)  break;
906
0
            c -= 0x10000;
907
0
      if (xmlLittleEndian) {
908
0
    tmp1 = 0xD800 | (c >> 10);
909
0
    tmp = (unsigned char *) out;
910
0
    *tmp = tmp1 >> 8;
911
0
    *(tmp + 1) = (unsigned char) tmp1;
912
0
    out++;
913
914
0
    tmp2 = 0xDC00 | (c & 0x03FF);
915
0
    tmp = (unsigned char *) out;
916
0
    *tmp = tmp2 >> 8;
917
0
    *(tmp + 1) = (unsigned char) tmp2;
918
0
    out++;
919
0
      } else {
920
0
    *out++ = 0xD800 | (c >> 10);
921
0
    *out++ = 0xDC00 | (c & 0x03FF);
922
0
      }
923
0
        }
924
0
        else
925
0
      break;
926
0
  processed = in;
927
0
    }
928
0
    *outlen = (out - outstart) * 2;
929
0
    *inlen = processed - instart;
930
0
    return(*outlen);
931
0
}
932
#endif /* LIBXML_OUTPUT_ENABLED */
933
934
/************************************************************************
935
 *                  *
936
 *    Generic encoding handling routines      *
937
 *                  *
938
 ************************************************************************/
939
940
/**
941
 * xmlDetectCharEncoding:
942
 * @in:  a pointer to the first bytes of the XML entity, must be at least
943
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
944
 * @len:  pointer to the length of the buffer
945
 *
946
 * Guess the encoding of the entity using the first bytes of the entity content
947
 * according to the non-normative appendix F of the XML-1.0 recommendation.
948
 *
949
 * Returns one of the XML_CHAR_ENCODING_... values.
950
 */
951
xmlCharEncoding
952
xmlDetectCharEncoding(const unsigned char* in, int len)
953
18.6k
{
954
18.6k
    if (in == NULL)
955
0
        return(XML_CHAR_ENCODING_NONE);
956
18.6k
    if (len >= 4) {
957
18.6k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
958
18.6k
      (in[2] == 0x00) && (in[3] == 0x3C))
959
4
      return(XML_CHAR_ENCODING_UCS4BE);
960
18.6k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961
18.6k
      (in[2] == 0x00) && (in[3] == 0x00))
962
17
      return(XML_CHAR_ENCODING_UCS4LE);
963
18.6k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
964
18.6k
      (in[2] == 0x3C) && (in[3] == 0x00))
965
1
      return(XML_CHAR_ENCODING_UCS4_2143);
966
18.6k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967
18.6k
      (in[2] == 0x00) && (in[3] == 0x00))
968
1
      return(XML_CHAR_ENCODING_UCS4_3412);
969
18.6k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970
18.6k
      (in[2] == 0xA7) && (in[3] == 0x94))
971
22
      return(XML_CHAR_ENCODING_EBCDIC);
972
18.6k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973
18.6k
      (in[2] == 0x78) && (in[3] == 0x6D))
974
4.71k
      return(XML_CHAR_ENCODING_UTF8);
975
  /*
976
   * Although not part of the recommendation, we also
977
   * attempt an "auto-recognition" of UTF-16LE and
978
   * UTF-16BE encodings.
979
   */
980
13.9k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981
13.9k
      (in[2] == 0x3F) && (in[3] == 0x00))
982
167
      return(XML_CHAR_ENCODING_UTF16LE);
983
13.7k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984
13.7k
      (in[2] == 0x00) && (in[3] == 0x3F))
985
78
      return(XML_CHAR_ENCODING_UTF16BE);
986
13.7k
    }
987
13.6k
    if (len >= 3) {
988
  /*
989
   * Errata on XML-1.0 June 20 2001
990
   * We now allow an UTF8 encoded BOM
991
   */
992
13.6k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993
13.6k
      (in[2] == 0xBF))
994
1
      return(XML_CHAR_ENCODING_UTF8);
995
13.6k
    }
996
    /* For UTF-16 we can recognize by the BOM */
997
13.6k
    if (len >= 2) {
998
13.6k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
999
157
      return(XML_CHAR_ENCODING_UTF16BE);
1000
13.5k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001
225
      return(XML_CHAR_ENCODING_UTF16LE);
1002
13.5k
    }
1003
13.3k
    return(XML_CHAR_ENCODING_NONE);
1004
13.6k
}
1005
1006
/**
1007
 * xmlCleanupEncodingAliases:
1008
 *
1009
 * Unregisters all aliases
1010
 */
1011
void
1012
0
xmlCleanupEncodingAliases(void) {
1013
0
    int i;
1014
1015
0
    if (xmlCharEncodingAliases == NULL)
1016
0
  return;
1017
1018
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019
0
  if (xmlCharEncodingAliases[i].name != NULL)
1020
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1021
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1022
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023
0
    }
1024
0
    xmlCharEncodingAliasesNb = 0;
1025
0
    xmlCharEncodingAliasesMax = 0;
1026
0
    xmlFree(xmlCharEncodingAliases);
1027
0
    xmlCharEncodingAliases = NULL;
1028
0
}
1029
1030
/**
1031
 * xmlGetEncodingAlias:
1032
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1033
 *
1034
 * Lookup an encoding name for the given alias.
1035
 *
1036
 * Returns NULL if not found, otherwise the original name
1037
 */
1038
const char *
1039
4.66k
xmlGetEncodingAlias(const char *alias) {
1040
4.66k
    int i;
1041
4.66k
    char upper[100];
1042
1043
4.66k
    if (alias == NULL)
1044
0
  return(NULL);
1045
1046
4.66k
    if (xmlCharEncodingAliases == NULL)
1047
4.66k
  return(NULL);
1048
1049
0
    for (i = 0;i < 99;i++) {
1050
0
        upper[i] = toupper(alias[i]);
1051
0
  if (upper[i] == 0) break;
1052
0
    }
1053
0
    upper[i] = 0;
1054
1055
    /*
1056
     * Walk down the list looking for a definition of the alias
1057
     */
1058
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060
0
      return(xmlCharEncodingAliases[i].name);
1061
0
  }
1062
0
    }
1063
0
    return(NULL);
1064
0
}
1065
1066
/**
1067
 * xmlAddEncodingAlias:
1068
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1069
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1070
 *
1071
 * Registers an alias @alias for an encoding named @name. Existing alias
1072
 * will be overwritten.
1073
 *
1074
 * Returns 0 in case of success, -1 in case of error
1075
 */
1076
int
1077
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1078
0
    int i;
1079
0
    char upper[100];
1080
1081
0
    if ((name == NULL) || (alias == NULL))
1082
0
  return(-1);
1083
1084
0
    for (i = 0;i < 99;i++) {
1085
0
        upper[i] = toupper(alias[i]);
1086
0
  if (upper[i] == 0) break;
1087
0
    }
1088
0
    upper[i] = 0;
1089
1090
0
    if (xmlCharEncodingAliases == NULL) {
1091
0
  xmlCharEncodingAliasesNb = 0;
1092
0
  xmlCharEncodingAliasesMax = 20;
1093
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095
0
  if (xmlCharEncodingAliases == NULL)
1096
0
      return(-1);
1097
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098
0
  xmlCharEncodingAliasesMax *= 2;
1099
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100
0
        xmlRealloc(xmlCharEncodingAliases,
1101
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102
0
    }
1103
    /*
1104
     * Walk down the list looking for a definition of the alias
1105
     */
1106
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108
      /*
1109
       * Replace the definition.
1110
       */
1111
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1112
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113
0
      return(0);
1114
0
  }
1115
0
    }
1116
    /*
1117
     * Add the definition
1118
     */
1119
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121
0
    xmlCharEncodingAliasesNb++;
1122
0
    return(0);
1123
0
}
1124
1125
/**
1126
 * xmlDelEncodingAlias:
1127
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1128
 *
1129
 * Unregisters an encoding alias @alias
1130
 *
1131
 * Returns 0 in case of success, -1 in case of error
1132
 */
1133
int
1134
0
xmlDelEncodingAlias(const char *alias) {
1135
0
    int i;
1136
1137
0
    if (alias == NULL)
1138
0
  return(-1);
1139
1140
0
    if (xmlCharEncodingAliases == NULL)
1141
0
  return(-1);
1142
    /*
1143
     * Walk down the list looking for a definition of the alias
1144
     */
1145
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1148
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149
0
      xmlCharEncodingAliasesNb--;
1150
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152
0
      return(0);
1153
0
  }
1154
0
    }
1155
0
    return(-1);
1156
0
}
1157
1158
/**
1159
 * xmlParseCharEncoding:
1160
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1161
 *
1162
 * Compare the string to the encoding schemes already known. Note
1163
 * that the comparison is case insensitive accordingly to the section
1164
 * [XML] 4.3.3 Character Encoding in Entities.
1165
 *
1166
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167
 * if not recognized.
1168
 */
1169
xmlCharEncoding
1170
xmlParseCharEncoding(const char* name)
1171
482
{
1172
482
    const char *alias;
1173
482
    char upper[500];
1174
482
    int i;
1175
1176
482
    if (name == NULL)
1177
0
  return(XML_CHAR_ENCODING_NONE);
1178
1179
    /*
1180
     * Do the alias resolution
1181
     */
1182
482
    alias = xmlGetEncodingAlias(name);
1183
482
    if (alias != NULL)
1184
0
  name = alias;
1185
1186
5.71k
    for (i = 0;i < 499;i++) {
1187
5.70k
        upper[i] = toupper(name[i]);
1188
5.70k
  if (upper[i] == 0) break;
1189
5.70k
    }
1190
482
    upper[i] = 0;
1191
1192
482
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193
482
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194
482
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195
1196
    /*
1197
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1198
     *       already found and in use
1199
     */
1200
482
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201
482
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
1203
482
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204
479
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205
479
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206
1207
    /*
1208
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1209
     *       already found and in use
1210
     */
1211
479
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212
456
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213
456
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
1215
1216
456
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217
456
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218
454
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219
1220
454
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221
454
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222
453
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223
1224
453
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225
453
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226
453
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227
453
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228
453
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229
453
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230
453
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231
1232
453
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233
453
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234
453
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235
1236
#ifdef DEBUG_ENCODING
1237
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238
#endif
1239
453
    return(XML_CHAR_ENCODING_ERROR);
1240
453
}
1241
1242
/**
1243
 * xmlGetCharEncodingName:
1244
 * @enc:  the encoding
1245
 *
1246
 * The "canonical" name for XML encoding.
1247
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248
 * Section 4.3.3  Character Encoding in Entities
1249
 *
1250
 * Returns the canonical name for the given encoding
1251
 */
1252
1253
const char*
1254
31
xmlGetCharEncodingName(xmlCharEncoding enc) {
1255
31
    switch (enc) {
1256
0
        case XML_CHAR_ENCODING_ERROR:
1257
0
      return(NULL);
1258
0
        case XML_CHAR_ENCODING_NONE:
1259
0
      return(NULL);
1260
0
        case XML_CHAR_ENCODING_UTF8:
1261
0
      return("UTF-8");
1262
0
        case XML_CHAR_ENCODING_UTF16LE:
1263
0
      return("UTF-16");
1264
0
        case XML_CHAR_ENCODING_UTF16BE:
1265
0
      return("UTF-16");
1266
0
        case XML_CHAR_ENCODING_EBCDIC:
1267
0
            return("EBCDIC");
1268
23
        case XML_CHAR_ENCODING_UCS4LE:
1269
23
            return("ISO-10646-UCS-4");
1270
0
        case XML_CHAR_ENCODING_UCS4BE:
1271
0
            return("ISO-10646-UCS-4");
1272
1
        case XML_CHAR_ENCODING_UCS4_2143:
1273
1
            return("ISO-10646-UCS-4");
1274
1
        case XML_CHAR_ENCODING_UCS4_3412:
1275
1
            return("ISO-10646-UCS-4");
1276
3
        case XML_CHAR_ENCODING_UCS2:
1277
3
            return("ISO-10646-UCS-2");
1278
2
        case XML_CHAR_ENCODING_8859_1:
1279
2
      return("ISO-8859-1");
1280
1
        case XML_CHAR_ENCODING_8859_2:
1281
1
      return("ISO-8859-2");
1282
0
        case XML_CHAR_ENCODING_8859_3:
1283
0
      return("ISO-8859-3");
1284
0
        case XML_CHAR_ENCODING_8859_4:
1285
0
      return("ISO-8859-4");
1286
0
        case XML_CHAR_ENCODING_8859_5:
1287
0
      return("ISO-8859-5");
1288
0
        case XML_CHAR_ENCODING_8859_6:
1289
0
      return("ISO-8859-6");
1290
0
        case XML_CHAR_ENCODING_8859_7:
1291
0
      return("ISO-8859-7");
1292
0
        case XML_CHAR_ENCODING_8859_8:
1293
0
      return("ISO-8859-8");
1294
0
        case XML_CHAR_ENCODING_8859_9:
1295
0
      return("ISO-8859-9");
1296
0
        case XML_CHAR_ENCODING_2022_JP:
1297
0
            return("ISO-2022-JP");
1298
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1299
0
            return("Shift-JIS");
1300
0
        case XML_CHAR_ENCODING_EUC_JP:
1301
0
            return("EUC-JP");
1302
0
  case XML_CHAR_ENCODING_ASCII:
1303
0
      return(NULL);
1304
31
    }
1305
0
    return(NULL);
1306
31
}
1307
1308
/************************************************************************
1309
 *                  *
1310
 *      Char encoding handlers        *
1311
 *                  *
1312
 ************************************************************************/
1313
1314
1315
/* the size should be growable, but it's not a big deal ... */
1316
9
#define MAX_ENCODING_HANDLERS 50
1317
static xmlCharEncodingHandlerPtr *handlers = NULL;
1318
static int nbCharEncodingHandler = 0;
1319
1320
/*
1321
 * The default is UTF-8 for XML, that's also the default used for the
1322
 * parser internals, so the default encoding handler is NULL
1323
 */
1324
1325
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326
1327
/**
1328
 * xmlNewCharEncodingHandler:
1329
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1330
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1331
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1332
 *
1333
 * Create and registers an xmlCharEncodingHandler.
1334
 *
1335
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336
 */
1337
xmlCharEncodingHandlerPtr
1338
xmlNewCharEncodingHandler(const char *name,
1339
                          xmlCharEncodingInputFunc input,
1340
8
                          xmlCharEncodingOutputFunc output) {
1341
8
    xmlCharEncodingHandlerPtr handler;
1342
8
    const char *alias;
1343
8
    char upper[500];
1344
8
    int i;
1345
8
    char *up = NULL;
1346
1347
    /*
1348
     * Do the alias resolution
1349
     */
1350
8
    alias = xmlGetEncodingAlias(name);
1351
8
    if (alias != NULL)
1352
0
  name = alias;
1353
1354
    /*
1355
     * Keep only the uppercase version of the encoding.
1356
     */
1357
8
    if (name == NULL) {
1358
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1359
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1360
0
  return(NULL);
1361
0
    }
1362
62
    for (i = 0;i < 499;i++) {
1363
62
        upper[i] = toupper(name[i]);
1364
62
  if (upper[i] == 0) break;
1365
62
    }
1366
8
    upper[i] = 0;
1367
8
    up = xmlMemStrdup(upper);
1368
8
    if (up == NULL) {
1369
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370
0
  return(NULL);
1371
0
    }
1372
1373
    /*
1374
     * allocate and fill-up an handler block.
1375
     */
1376
8
    handler = (xmlCharEncodingHandlerPtr)
1377
8
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1378
8
    if (handler == NULL) {
1379
0
        xmlFree(up);
1380
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381
0
  return(NULL);
1382
0
    }
1383
8
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384
8
    handler->input = input;
1385
8
    handler->output = output;
1386
8
    handler->name = up;
1387
1388
8
#ifdef LIBXML_ICONV_ENABLED
1389
8
    handler->iconv_in = NULL;
1390
8
    handler->iconv_out = NULL;
1391
8
#endif
1392
#ifdef LIBXML_ICU_ENABLED
1393
    handler->uconv_in = NULL;
1394
    handler->uconv_out = NULL;
1395
#endif
1396
1397
    /*
1398
     * registers and returns the handler.
1399
     */
1400
8
    xmlRegisterCharEncodingHandler(handler);
1401
#ifdef DEBUG_ENCODING
1402
    xmlGenericError(xmlGenericErrorContext,
1403
      "Registered encoding handler for %s\n", name);
1404
#endif
1405
8
    return(handler);
1406
8
}
1407
1408
/**
1409
 * xmlInitCharEncodingHandlers:
1410
 *
1411
 * DEPRECATED: This function will be made private. Call xmlInitParser to
1412
 * initialize the library.
1413
 *
1414
 * Initialize the char encoding support, it registers the default
1415
 * encoding supported.
1416
 * NOTE: while public, this function usually doesn't need to be called
1417
 *       in normal processing.
1418
 */
1419
void
1420
1
xmlInitCharEncodingHandlers(void) {
1421
1
    unsigned short int tst = 0x1234;
1422
1
    unsigned char *ptr = (unsigned char *) &tst;
1423
1424
1
    if (handlers != NULL) return;
1425
1426
1
    handlers = (xmlCharEncodingHandlerPtr *)
1427
1
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428
1429
1
    if (*ptr == 0x12) xmlLittleEndian = 0;
1430
1
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1431
0
    else {
1432
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433
0
                 "Odd problem at endianness detection\n", NULL);
1434
0
    }
1435
1436
1
    if (handlers == NULL) {
1437
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438
0
  return;
1439
0
    }
1440
1
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441
1
#ifdef LIBXML_OUTPUT_ENABLED
1442
1
    xmlUTF16LEHandler =
1443
1
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444
1
    xmlUTF16BEHandler =
1445
1
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446
1
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447
1
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448
1
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449
1
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450
1
#ifdef LIBXML_HTML_ENABLED
1451
1
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452
1
#endif
1453
#else
1454
    xmlUTF16LEHandler =
1455
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456
    xmlUTF16BEHandler =
1457
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462
#endif /* LIBXML_OUTPUT_ENABLED */
1463
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464
#ifdef LIBXML_ISO8859X_ENABLED
1465
    xmlRegisterCharEncodingHandlersISO8859x ();
1466
#endif
1467
#endif
1468
1469
1
}
1470
1471
/**
1472
 * xmlCleanupCharEncodingHandlers:
1473
 *
1474
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475
 * to free global state but see the warnings there. xmlCleanupParser
1476
 * should be only called once at program exit. In most cases, you don't
1477
 * have call cleanup functions at all.
1478
 *
1479
 * Cleanup the memory allocated for the char encoding support, it
1480
 * unregisters all the encoding handlers and the aliases.
1481
 */
1482
void
1483
0
xmlCleanupCharEncodingHandlers(void) {
1484
0
    xmlCleanupEncodingAliases();
1485
1486
0
    if (handlers == NULL) return;
1487
1488
0
    for (;nbCharEncodingHandler > 0;) {
1489
0
        nbCharEncodingHandler--;
1490
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1491
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1492
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1493
0
      xmlFree(handlers[nbCharEncodingHandler]);
1494
0
  }
1495
0
    }
1496
0
    xmlFree(handlers);
1497
0
    handlers = NULL;
1498
0
    nbCharEncodingHandler = 0;
1499
0
    xmlDefaultCharEncodingHandler = NULL;
1500
0
}
1501
1502
/**
1503
 * xmlRegisterCharEncodingHandler:
1504
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1505
 *
1506
 * Register the char encoding handler, surprising, isn't it ?
1507
 */
1508
void
1509
8
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510
8
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1511
8
    if ((handler == NULL) || (handlers == NULL)) {
1512
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1513
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514
0
        goto free_handler;
1515
0
    }
1516
1517
8
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520
0
                 "MAX_ENCODING_HANDLERS");
1521
0
        goto free_handler;
1522
0
    }
1523
8
    handlers[nbCharEncodingHandler++] = handler;
1524
8
    return;
1525
1526
0
free_handler:
1527
0
    if (handler != NULL) {
1528
0
        if (handler->name != NULL) {
1529
0
            xmlFree(handler->name);
1530
0
        }
1531
0
        xmlFree(handler);
1532
0
    }
1533
0
}
1534
1535
/**
1536
 * xmlGetCharEncodingHandler:
1537
 * @enc:  an xmlCharEncoding value.
1538
 *
1539
 * Search in the registered set the handler able to read/write that encoding.
1540
 *
1541
 * Returns the handler or NULL if not found
1542
 */
1543
xmlCharEncodingHandlerPtr
1544
320k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545
320k
    xmlCharEncodingHandlerPtr handler;
1546
1547
320k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1548
320k
    switch (enc) {
1549
0
        case XML_CHAR_ENCODING_ERROR:
1550
0
      return(NULL);
1551
319k
        case XML_CHAR_ENCODING_NONE:
1552
319k
      return(NULL);
1553
0
        case XML_CHAR_ENCODING_UTF8:
1554
0
      return(NULL);
1555
392
        case XML_CHAR_ENCODING_UTF16LE:
1556
392
      return(xmlUTF16LEHandler);
1557
235
        case XML_CHAR_ENCODING_UTF16BE:
1558
235
      return(xmlUTF16BEHandler);
1559
22
        case XML_CHAR_ENCODING_EBCDIC:
1560
22
            handler = xmlFindCharEncodingHandler("EBCDIC");
1561
22
            if (handler != NULL) return(handler);
1562
22
            handler = xmlFindCharEncodingHandler("ebcdic");
1563
22
            if (handler != NULL) return(handler);
1564
22
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565
22
            if (handler != NULL) return(handler);
1566
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1567
0
            if (handler != NULL) return(handler);
1568
0
      break;
1569
4
        case XML_CHAR_ENCODING_UCS4BE:
1570
4
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571
4
            if (handler != NULL) return(handler);
1572
4
            handler = xmlFindCharEncodingHandler("UCS-4");
1573
4
            if (handler != NULL) return(handler);
1574
0
            handler = xmlFindCharEncodingHandler("UCS4");
1575
0
            if (handler != NULL) return(handler);
1576
0
      break;
1577
17
        case XML_CHAR_ENCODING_UCS4LE:
1578
17
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579
17
            if (handler != NULL) return(handler);
1580
17
            handler = xmlFindCharEncodingHandler("UCS-4");
1581
17
            if (handler != NULL) return(handler);
1582
0
            handler = xmlFindCharEncodingHandler("UCS4");
1583
0
            if (handler != NULL) return(handler);
1584
0
      break;
1585
1
        case XML_CHAR_ENCODING_UCS4_2143:
1586
1
      break;
1587
1
        case XML_CHAR_ENCODING_UCS4_3412:
1588
1
      break;
1589
0
        case XML_CHAR_ENCODING_UCS2:
1590
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591
0
            if (handler != NULL) return(handler);
1592
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1593
0
            if (handler != NULL) return(handler);
1594
0
            handler = xmlFindCharEncodingHandler("UCS2");
1595
0
            if (handler != NULL) return(handler);
1596
0
      break;
1597
1598
      /*
1599
       * We used to keep ISO Latin encodings native in the
1600
       * generated data. This led to so many problems that
1601
       * this has been removed. One can still change this
1602
       * back by registering no-ops encoders for those
1603
       */
1604
0
        case XML_CHAR_ENCODING_8859_1:
1605
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606
0
      if (handler != NULL) return(handler);
1607
0
      break;
1608
0
        case XML_CHAR_ENCODING_8859_2:
1609
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610
0
      if (handler != NULL) return(handler);
1611
0
      break;
1612
0
        case XML_CHAR_ENCODING_8859_3:
1613
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614
0
      if (handler != NULL) return(handler);
1615
0
      break;
1616
0
        case XML_CHAR_ENCODING_8859_4:
1617
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618
0
      if (handler != NULL) return(handler);
1619
0
      break;
1620
0
        case XML_CHAR_ENCODING_8859_5:
1621
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622
0
      if (handler != NULL) return(handler);
1623
0
      break;
1624
0
        case XML_CHAR_ENCODING_8859_6:
1625
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626
0
      if (handler != NULL) return(handler);
1627
0
      break;
1628
0
        case XML_CHAR_ENCODING_8859_7:
1629
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630
0
      if (handler != NULL) return(handler);
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_8859_8:
1633
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634
0
      if (handler != NULL) return(handler);
1635
0
      break;
1636
0
        case XML_CHAR_ENCODING_8859_9:
1637
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638
0
      if (handler != NULL) return(handler);
1639
0
      break;
1640
1641
1642
0
        case XML_CHAR_ENCODING_2022_JP:
1643
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644
0
            if (handler != NULL) return(handler);
1645
0
      break;
1646
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1647
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648
0
            if (handler != NULL) return(handler);
1649
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650
0
            if (handler != NULL) return(handler);
1651
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1652
0
            if (handler != NULL) return(handler);
1653
0
      break;
1654
0
        case XML_CHAR_ENCODING_EUC_JP:
1655
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1656
0
            if (handler != NULL) return(handler);
1657
0
      break;
1658
0
  default:
1659
0
      break;
1660
320k
    }
1661
1662
#ifdef DEBUG_ENCODING
1663
    xmlGenericError(xmlGenericErrorContext,
1664
      "No handler found for encoding %d\n", enc);
1665
#endif
1666
2
    return(NULL);
1667
320k
}
1668
1669
/**
1670
 * xmlFindCharEncodingHandler:
1671
 * @name:  a string describing the char encoding.
1672
 *
1673
 * Search in the registered set the handler able to read/write that encoding.
1674
 *
1675
 * Returns the handler or NULL if not found
1676
 */
1677
xmlCharEncodingHandlerPtr
1678
4.17k
xmlFindCharEncodingHandler(const char *name) {
1679
4.17k
    const char *nalias;
1680
4.17k
    const char *norig;
1681
4.17k
    xmlCharEncoding alias;
1682
4.17k
#ifdef LIBXML_ICONV_ENABLED
1683
4.17k
    xmlCharEncodingHandlerPtr enc;
1684
4.17k
    iconv_t icv_in, icv_out;
1685
4.17k
#endif /* LIBXML_ICONV_ENABLED */
1686
#ifdef LIBXML_ICU_ENABLED
1687
    xmlCharEncodingHandlerPtr encu;
1688
    uconv_t *ucv_in, *ucv_out;
1689
#endif /* LIBXML_ICU_ENABLED */
1690
4.17k
    char upper[100];
1691
4.17k
    int i;
1692
1693
4.17k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1694
4.17k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695
4.17k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696
1697
    /*
1698
     * Do the alias resolution
1699
     */
1700
4.17k
    norig = name;
1701
4.17k
    nalias = xmlGetEncodingAlias(name);
1702
4.17k
    if (nalias != NULL)
1703
0
  name = nalias;
1704
1705
    /*
1706
     * Check first for directly registered encoding names
1707
     */
1708
29.1k
    for (i = 0;i < 99;i++) {
1709
29.1k
        upper[i] = toupper(name[i]);
1710
29.1k
  if (upper[i] == 0) break;
1711
29.1k
    }
1712
4.17k
    upper[i] = 0;
1713
1714
4.17k
    if (handlers != NULL) {
1715
36.8k
        for (i = 0;i < nbCharEncodingHandler; i++) {
1716
32.8k
            if (!strcmp(upper, handlers[i]->name)) {
1717
#ifdef DEBUG_ENCODING
1718
                xmlGenericError(xmlGenericErrorContext,
1719
                        "Found registered handler for encoding %s\n", name);
1720
#endif
1721
192
                return(handlers[i]);
1722
192
            }
1723
32.8k
        }
1724
4.17k
    }
1725
1726
3.98k
#ifdef LIBXML_ICONV_ENABLED
1727
    /* check whether iconv can handle this */
1728
3.98k
    icv_in = iconv_open("UTF-8", name);
1729
3.98k
    icv_out = iconv_open(name, "UTF-8");
1730
3.98k
    if (icv_in == (iconv_t) -1) {
1731
482
        icv_in = iconv_open("UTF-8", upper);
1732
482
    }
1733
3.98k
    if (icv_out == (iconv_t) -1) {
1734
482
  icv_out = iconv_open(upper, "UTF-8");
1735
482
    }
1736
3.98k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737
3.50k
      enc = (xmlCharEncodingHandlerPtr)
1738
3.50k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1739
3.50k
      if (enc == NULL) {
1740
0
          iconv_close(icv_in);
1741
0
          iconv_close(icv_out);
1742
0
    return(NULL);
1743
0
      }
1744
3.50k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745
3.50k
      enc->name = xmlMemStrdup(name);
1746
3.50k
      enc->input = NULL;
1747
3.50k
      enc->output = NULL;
1748
3.50k
      enc->iconv_in = icv_in;
1749
3.50k
      enc->iconv_out = icv_out;
1750
#ifdef DEBUG_ENCODING
1751
            xmlGenericError(xmlGenericErrorContext,
1752
        "Found iconv handler for encoding %s\n", name);
1753
#endif
1754
3.50k
      return enc;
1755
3.50k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757
0
        "iconv : problems with filters for '%s'\n", name);
1758
0
      if (icv_in != (iconv_t) -1)
1759
0
    iconv_close(icv_in);
1760
0
      else
1761
0
    iconv_close(icv_out);
1762
0
    }
1763
482
#endif /* LIBXML_ICONV_ENABLED */
1764
#ifdef LIBXML_ICU_ENABLED
1765
    /* check whether icu can handle this */
1766
    ucv_in = openIcuConverter(name, 1);
1767
    ucv_out = openIcuConverter(name, 0);
1768
    if (ucv_in != NULL && ucv_out != NULL) {
1769
      encu = (xmlCharEncodingHandlerPtr)
1770
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1771
      if (encu == NULL) {
1772
                closeIcuConverter(ucv_in);
1773
                closeIcuConverter(ucv_out);
1774
    return(NULL);
1775
      }
1776
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777
      encu->name = xmlMemStrdup(name);
1778
      encu->input = NULL;
1779
      encu->output = NULL;
1780
      encu->uconv_in = ucv_in;
1781
      encu->uconv_out = ucv_out;
1782
#ifdef DEBUG_ENCODING
1783
            xmlGenericError(xmlGenericErrorContext,
1784
        "Found ICU converter handler for encoding %s\n", name);
1785
#endif
1786
      return encu;
1787
    } else if (ucv_in != NULL || ucv_out != NULL) {
1788
            closeIcuConverter(ucv_in);
1789
            closeIcuConverter(ucv_out);
1790
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791
        "ICU converter : problems with filters for '%s'\n", name);
1792
    }
1793
#endif /* LIBXML_ICU_ENABLED */
1794
1795
#ifdef DEBUG_ENCODING
1796
    xmlGenericError(xmlGenericErrorContext,
1797
      "No handler found for encoding %s\n", name);
1798
#endif
1799
1800
    /*
1801
     * Fallback using the canonical names
1802
     */
1803
482
    alias = xmlParseCharEncoding(norig);
1804
482
    if (alias != XML_CHAR_ENCODING_ERROR) {
1805
29
        const char* canon;
1806
29
        canon = xmlGetCharEncodingName(alias);
1807
29
        if ((canon != NULL) && (strcmp(name, canon))) {
1808
5
      return(xmlFindCharEncodingHandler(canon));
1809
5
        }
1810
29
    }
1811
1812
    /* If "none of the above", give up */
1813
477
    return(NULL);
1814
482
}
1815
1816
/************************************************************************
1817
 *                  *
1818
 *    ICONV based generic conversion functions    *
1819
 *                  *
1820
 ************************************************************************/
1821
1822
#ifdef LIBXML_ICONV_ENABLED
1823
/**
1824
 * xmlIconvWrapper:
1825
 * @cd:   iconv converter data structure
1826
 * @out:  a pointer to an array of bytes to store the result
1827
 * @outlen:  the length of @out
1828
 * @in:  a pointer to an array of input bytes
1829
 * @inlen:  the length of @in
1830
 *
1831
 * Returns 0 if success, or
1832
 *     -1 by lack of space, or
1833
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1834
 *        the result of transformation can't fit into the encoding we want), or
1835
 *     -3 if there the last byte can't form a single output char.
1836
 *
1837
 * The value of @inlen after return is the number of octets consumed
1838
 *     as the return value is positive, else unpredictable.
1839
 * The value of @outlen after return is the number of octets produced.
1840
 */
1841
static int
1842
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843
162k
                const unsigned char *in, int *inlen) {
1844
162k
    size_t icv_inlen, icv_outlen;
1845
162k
    const char *icv_in = (const char *) in;
1846
162k
    char *icv_out = (char *) out;
1847
162k
    size_t ret;
1848
1849
162k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850
0
        if (outlen != NULL) *outlen = 0;
1851
0
        return(-1);
1852
0
    }
1853
162k
    icv_inlen = *inlen;
1854
162k
    icv_outlen = *outlen;
1855
    /*
1856
     * Some versions take const, other versions take non-const input.
1857
     */
1858
162k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859
162k
    *inlen -= icv_inlen;
1860
162k
    *outlen -= icv_outlen;
1861
162k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862
159k
#ifdef EILSEQ
1863
159k
        if (errno == EILSEQ) {
1864
154k
            return -2;
1865
154k
        } else
1866
5.67k
#endif
1867
5.67k
#ifdef E2BIG
1868
5.67k
        if (errno == E2BIG) {
1869
5.19k
            return -1;
1870
5.19k
        } else
1871
473
#endif
1872
473
#ifdef EINVAL
1873
473
        if (errno == EINVAL) {
1874
473
            return -3;
1875
473
        } else
1876
0
#endif
1877
0
        {
1878
0
            return -3;
1879
0
        }
1880
159k
    }
1881
3.16k
    return 0;
1882
162k
}
1883
#endif /* LIBXML_ICONV_ENABLED */
1884
1885
/************************************************************************
1886
 *                  *
1887
 *    ICU based generic conversion functions    *
1888
 *                  *
1889
 ************************************************************************/
1890
1891
#ifdef LIBXML_ICU_ENABLED
1892
/**
1893
 * xmlUconvWrapper:
1894
 * @cd: ICU uconverter data structure
1895
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896
 * @out:  a pointer to an array of bytes to store the result
1897
 * @outlen:  the length of @out
1898
 * @in:  a pointer to an array of input bytes
1899
 * @inlen:  the length of @in
1900
 * @flush: if true, indicates end of input
1901
 *
1902
 * Returns 0 if success, or
1903
 *     -1 by lack of space, or
1904
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905
 *        the result of transformation can't fit into the encoding we want), or
1906
 *     -3 if there the last byte can't form a single output char.
1907
 *
1908
 * The value of @inlen after return is the number of octets consumed
1909
 *     as the return value is positive, else unpredictable.
1910
 * The value of @outlen after return is the number of octets produced.
1911
 */
1912
static int
1913
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1914
                const unsigned char *in, int *inlen, int flush) {
1915
    const char *ucv_in = (const char *) in;
1916
    char *ucv_out = (char *) out;
1917
    UErrorCode err = U_ZERO_ERROR;
1918
1919
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920
        if (outlen != NULL) *outlen = 0;
1921
        return(-1);
1922
    }
1923
1924
    if (toUnicode) {
1925
        /* encoding => UTF-16 => UTF-8 */
1926
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1927
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1928
                       &cd->pivot_source, &cd->pivot_target,
1929
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930
    } else {
1931
        /* UTF-8 => UTF-16 => encoding */
1932
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1933
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1934
                       &cd->pivot_source, &cd->pivot_target,
1935
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936
    }
1937
    *inlen = ucv_in - (const char*) in;
1938
    *outlen = ucv_out - (char *) out;
1939
    if (U_SUCCESS(err)) {
1940
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1941
        if (flush)
1942
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943
        return 0;
1944
    }
1945
    if (err == U_BUFFER_OVERFLOW_ERROR)
1946
        return -1;
1947
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948
        return -2;
1949
    return -3;
1950
}
1951
#endif /* LIBXML_ICU_ENABLED */
1952
1953
/************************************************************************
1954
 *                  *
1955
 *    The real API used by libxml for on-the-fly conversion *
1956
 *                  *
1957
 ************************************************************************/
1958
1959
/**
1960
 * xmlEncInputChunk:
1961
 * @handler:  encoding handler
1962
 * @out:  a pointer to an array of bytes to store the result
1963
 * @outlen:  the length of @out
1964
 * @in:  a pointer to an array of input bytes
1965
 * @inlen:  the length of @in
1966
 * @flush:  flush (ICU-related)
1967
 *
1968
 * Returns 0 if success, or
1969
 *     -1 by lack of space, or
1970
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1971
 *        the result of transformation can't fit into the encoding we want), or
1972
 *     -3 if there the last byte can't form a single output char.
1973
 *
1974
 * The value of @inlen after return is the number of octets consumed
1975
 *     as the return value is 0, else unpredictable.
1976
 * The value of @outlen after return is the number of octets produced.
1977
 */
1978
static int
1979
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1980
169k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
1981
169k
    int ret;
1982
169k
    (void)flush;
1983
1984
169k
    if (handler->input != NULL) {
1985
6.44k
        ret = handler->input(out, outlen, in, inlen);
1986
6.44k
        if (ret > 0)
1987
1.05k
           ret = 0;
1988
6.44k
    }
1989
162k
#ifdef LIBXML_ICONV_ENABLED
1990
162k
    else if (handler->iconv_in != NULL) {
1991
162k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992
162k
    }
1993
1
#endif /* LIBXML_ICONV_ENABLED */
1994
#ifdef LIBXML_ICU_ENABLED
1995
    else if (handler->uconv_in != NULL) {
1996
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1997
                              flush);
1998
    }
1999
#endif /* LIBXML_ICU_ENABLED */
2000
1
    else {
2001
1
        *outlen = 0;
2002
1
        *inlen = 0;
2003
1
        ret = -2;
2004
1
    }
2005
2006
169k
    return(ret);
2007
169k
}
2008
2009
/**
2010
 * xmlEncOutputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 *
2017
 * Returns 0 if success, or
2018
 *     -1 by lack of space, or
2019
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2020
 *        the result of transformation can't fit into the encoding we want), or
2021
 *     -3 if there the last byte can't form a single output char.
2022
 *     -4 if no output function was found.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
0
                  int *outlen, const unsigned char *in, int *inlen) {
2031
0
    int ret;
2032
2033
0
    if (handler->output != NULL) {
2034
0
        ret = handler->output(out, outlen, in, inlen);
2035
0
        if (ret > 0)
2036
0
           ret = 0;
2037
0
    }
2038
0
#ifdef LIBXML_ICONV_ENABLED
2039
0
    else if (handler->iconv_out != NULL) {
2040
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041
0
    }
2042
0
#endif /* LIBXML_ICONV_ENABLED */
2043
#ifdef LIBXML_ICU_ENABLED
2044
    else if (handler->uconv_out != NULL) {
2045
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2046
                              1);
2047
    }
2048
#endif /* LIBXML_ICU_ENABLED */
2049
0
    else {
2050
0
        *outlen = 0;
2051
0
        *inlen = 0;
2052
0
        ret = -4;
2053
0
    }
2054
2055
0
    return(ret);
2056
0
}
2057
2058
/**
2059
 * xmlCharEncFirstLineInt:
2060
 * @handler:  char encoding transformation data structure
2061
 * @out:  an xmlBuffer for the output.
2062
 * @in:  an xmlBuffer for the input
2063
 * @len:  number of bytes to convert for the first line, or -1
2064
 *
2065
 * Front-end for the encoding handler input function, but handle only
2066
 * the very first line, i.e. limit itself to 45 chars.
2067
 *
2068
 * Returns the number of byte written if success, or
2069
 *     -1 general error
2070
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071
 *        the result of transformation can't fit into the encoding we want), or
2072
 */
2073
int
2074
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075
0
                       xmlBufferPtr in, int len) {
2076
0
    int ret;
2077
0
    int written;
2078
0
    int toconv;
2079
2080
0
    if (handler == NULL) return(-1);
2081
0
    if (out == NULL) return(-1);
2082
0
    if (in == NULL) return(-1);
2083
2084
    /* calculate space available */
2085
0
    written = out->size - out->use - 1; /* count '\0' */
2086
0
    toconv = in->use;
2087
    /*
2088
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089
     * 45 chars should be sufficient to reach the end of the encoding
2090
     * declaration without going too far inside the document content.
2091
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2092
     * The actual value depending on guessed encoding is passed as @len
2093
     * if provided
2094
     */
2095
0
    if (len >= 0) {
2096
0
        if (toconv > len)
2097
0
            toconv = len;
2098
0
    } else {
2099
0
        if (toconv > 180)
2100
0
            toconv = 180;
2101
0
    }
2102
0
    if (toconv * 2 >= written) {
2103
0
        xmlBufferGrow(out, toconv * 2);
2104
0
  written = out->size - out->use - 1;
2105
0
    }
2106
2107
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2108
0
                           in->content, &toconv, 0);
2109
0
    xmlBufferShrink(in, toconv);
2110
0
    out->use += written;
2111
0
    out->content[out->use] = 0;
2112
0
    if (ret == -1) ret = -3;
2113
2114
#ifdef DEBUG_ENCODING
2115
    switch (ret) {
2116
        case 0:
2117
      xmlGenericError(xmlGenericErrorContext,
2118
        "converted %d bytes to %d bytes of input\n",
2119
              toconv, written);
2120
      break;
2121
        case -1:
2122
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123
              toconv, written, in->use);
2124
      break;
2125
        case -2:
2126
      xmlGenericError(xmlGenericErrorContext,
2127
        "input conversion failed due to input error\n");
2128
      break;
2129
        case -3:
2130
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131
              toconv, written, in->use);
2132
      break;
2133
  default:
2134
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135
    }
2136
#endif /* DEBUG_ENCODING */
2137
    /*
2138
     * Ignore when input buffer is not on a boundary
2139
     */
2140
0
    if (ret == -3) ret = 0;
2141
0
    if (ret == -1) ret = 0;
2142
0
    return(written ? written : ret);
2143
0
}
2144
2145
/**
2146
 * xmlCharEncFirstLine:
2147
 * @handler:  char encoding transformation data structure
2148
 * @out:  an xmlBuffer for the output.
2149
 * @in:  an xmlBuffer for the input
2150
 *
2151
 * Front-end for the encoding handler input function, but handle only
2152
 * the very first line, i.e. limit itself to 45 chars.
2153
 *
2154
 * Returns the number of byte written if success, or
2155
 *     -1 general error
2156
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2157
 *        the result of transformation can't fit into the encoding we want), or
2158
 */
2159
int
2160
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161
0
                 xmlBufferPtr in) {
2162
0
    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163
0
}
2164
2165
/**
2166
 * xmlCharEncFirstLineInput:
2167
 * @input: a parser input buffer
2168
 * @len:  number of bytes to convert for the first line, or -1
2169
 *
2170
 * Front-end for the encoding handler input function, but handle only
2171
 * the very first line. Point is that this is based on autodetection
2172
 * of the encoding and once that first line is converted we may find
2173
 * out that a different decoder is needed to process the input.
2174
 *
2175
 * Returns the number of byte written if success, or
2176
 *     -1 general error
2177
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2178
 *        the result of transformation can't fit into the encoding we want), or
2179
 */
2180
int
2181
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182
4.32k
{
2183
4.32k
    int ret;
2184
4.32k
    size_t written;
2185
4.32k
    size_t toconv;
2186
4.32k
    int c_in;
2187
4.32k
    int c_out;
2188
4.32k
    xmlBufPtr in;
2189
4.32k
    xmlBufPtr out;
2190
2191
4.32k
    if ((input == NULL) || (input->encoder == NULL) ||
2192
4.32k
        (input->buffer == NULL) || (input->raw == NULL))
2193
0
        return (-1);
2194
4.32k
    out = input->buffer;
2195
4.32k
    in = input->raw;
2196
2197
4.32k
    toconv = xmlBufUse(in);
2198
4.32k
    if (toconv == 0)
2199
10
        return (0);
2200
4.31k
    written = xmlBufAvail(out);
2201
    /*
2202
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203
     * 45 chars should be sufficient to reach the end of the encoding
2204
     * declaration without going too far inside the document content.
2205
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2206
     * The actual value depending on guessed encoding is passed as @len
2207
     * if provided
2208
     */
2209
4.31k
    if (len >= 0) {
2210
670
        if (toconv > (unsigned int) len)
2211
167
            toconv = len;
2212
3.64k
    } else {
2213
3.64k
        if (toconv > 180)
2214
2.20k
            toconv = 180;
2215
3.64k
    }
2216
4.31k
    if (toconv * 2 >= written) {
2217
0
        xmlBufGrow(out, toconv * 2);
2218
0
        written = xmlBufAvail(out);
2219
0
    }
2220
4.31k
    if (written > 360)
2221
4.31k
        written = 360;
2222
2223
4.31k
    c_in = toconv;
2224
4.31k
    c_out = written;
2225
4.31k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2226
4.31k
                           xmlBufContent(in), &c_in, 0);
2227
4.31k
    xmlBufShrink(in, c_in);
2228
4.31k
    xmlBufAddLen(out, c_out);
2229
4.31k
    if (ret == -1)
2230
2.16k
        ret = -3;
2231
2232
4.31k
    switch (ret) {
2233
1.82k
        case 0:
2234
#ifdef DEBUG_ENCODING
2235
            xmlGenericError(xmlGenericErrorContext,
2236
                            "converted %d bytes to %d bytes of input\n",
2237
                            c_in, c_out);
2238
#endif
2239
1.82k
            break;
2240
0
        case -1:
2241
#ifdef DEBUG_ENCODING
2242
            xmlGenericError(xmlGenericErrorContext,
2243
                         "converted %d bytes to %d bytes of input, %d left\n",
2244
                            c_in, c_out, (int)xmlBufUse(in));
2245
#endif
2246
0
            break;
2247
2.17k
        case -3:
2248
#ifdef DEBUG_ENCODING
2249
            xmlGenericError(xmlGenericErrorContext,
2250
                        "converted %d bytes to %d bytes of input, %d left\n",
2251
                            c_in, c_out, (int)xmlBufUse(in));
2252
#endif
2253
2.17k
            break;
2254
314
        case -2: {
2255
314
            char buf[50];
2256
314
            const xmlChar *content = xmlBufContent(in);
2257
2258
314
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259
314
         content[0], content[1],
2260
314
         content[2], content[3]);
2261
314
      buf[49] = 0;
2262
314
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2263
314
        "input conversion failed due to input error, bytes %s\n",
2264
314
               buf);
2265
314
        }
2266
4.31k
    }
2267
    /*
2268
     * Ignore when input buffer is not on a boundary
2269
     */
2270
4.31k
    if (ret == -3) ret = 0;
2271
4.31k
    if (ret == -1) ret = 0;
2272
4.31k
    return(c_out ? c_out : ret);
2273
4.31k
}
2274
2275
/**
2276
 * xmlCharEncInput:
2277
 * @input: a parser input buffer
2278
 * @flush: try to flush all the raw buffer
2279
 *
2280
 * Generic front-end for the encoding handler on parser input
2281
 *
2282
 * Returns the number of byte written if success, or
2283
 *     -1 general error
2284
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2285
 *        the result of transformation can't fit into the encoding we want), or
2286
 */
2287
int
2288
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289
374k
{
2290
374k
    int ret;
2291
374k
    size_t written;
2292
374k
    size_t toconv;
2293
374k
    int c_in;
2294
374k
    int c_out;
2295
374k
    xmlBufPtr in;
2296
374k
    xmlBufPtr out;
2297
2298
374k
    if ((input == NULL) || (input->encoder == NULL) ||
2299
374k
        (input->buffer == NULL) || (input->raw == NULL))
2300
0
        return (-1);
2301
374k
    out = input->buffer;
2302
374k
    in = input->raw;
2303
2304
374k
    toconv = xmlBufUse(in);
2305
374k
    if (toconv == 0)
2306
209k
        return (0);
2307
165k
    if ((toconv > 64 * 1024) && (flush == 0))
2308
0
        toconv = 64 * 1024;
2309
165k
    written = xmlBufAvail(out);
2310
165k
    if (toconv * 2 >= written) {
2311
1.18k
        xmlBufGrow(out, toconv * 2);
2312
1.18k
        written = xmlBufAvail(out);
2313
1.18k
    }
2314
165k
    if ((written > 128 * 1024) && (flush == 0))
2315
0
        written = 128 * 1024;
2316
2317
165k
    c_in = toconv;
2318
165k
    c_out = written;
2319
165k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2320
165k
                           xmlBufContent(in), &c_in, flush);
2321
165k
    xmlBufShrink(in, c_in);
2322
165k
    xmlBufAddLen(out, c_out);
2323
165k
    if (ret == -1)
2324
3.84k
        ret = -3;
2325
2326
165k
    switch (ret) {
2327
6.91k
        case 0:
2328
#ifdef DEBUG_ENCODING
2329
            xmlGenericError(xmlGenericErrorContext,
2330
                            "converted %d bytes to %d bytes of input\n",
2331
                            c_in, c_out);
2332
#endif
2333
6.91k
            break;
2334
0
        case -1:
2335
#ifdef DEBUG_ENCODING
2336
            xmlGenericError(xmlGenericErrorContext,
2337
                         "converted %d bytes to %d bytes of input, %d left\n",
2338
                            c_in, c_out, (int)xmlBufUse(in));
2339
#endif
2340
0
            break;
2341
4.31k
        case -3:
2342
#ifdef DEBUG_ENCODING
2343
            xmlGenericError(xmlGenericErrorContext,
2344
                        "converted %d bytes to %d bytes of input, %d left\n",
2345
                            c_in, c_out, (int)xmlBufUse(in));
2346
#endif
2347
4.31k
            break;
2348
153k
        case -2: {
2349
153k
            char buf[50];
2350
153k
            const xmlChar *content = xmlBufContent(in);
2351
2352
153k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353
153k
         content[0], content[1],
2354
153k
         content[2], content[3]);
2355
153k
      buf[49] = 0;
2356
153k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2357
153k
        "input conversion failed due to input error, bytes %s\n",
2358
153k
               buf);
2359
153k
        }
2360
165k
    }
2361
    /*
2362
     * Ignore when input buffer is not on a boundary
2363
     */
2364
165k
    if (ret == -3)
2365
4.31k
        ret = 0;
2366
165k
    return (c_out? c_out : ret);
2367
165k
}
2368
2369
/**
2370
 * xmlCharEncInFunc:
2371
 * @handler:  char encoding transformation data structure
2372
 * @out:  an xmlBuffer for the output.
2373
 * @in:  an xmlBuffer for the input
2374
 *
2375
 * Generic front-end for the encoding handler input function
2376
 *
2377
 * Returns the number of byte written if success, or
2378
 *     -1 general error
2379
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2380
 *        the result of transformation can't fit into the encoding we want), or
2381
 */
2382
int
2383
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2384
                 xmlBufferPtr in)
2385
0
{
2386
0
    int ret;
2387
0
    int written;
2388
0
    int toconv;
2389
2390
0
    if (handler == NULL)
2391
0
        return (-1);
2392
0
    if (out == NULL)
2393
0
        return (-1);
2394
0
    if (in == NULL)
2395
0
        return (-1);
2396
2397
0
    toconv = in->use;
2398
0
    if (toconv == 0)
2399
0
        return (0);
2400
0
    written = out->size - out->use -1; /* count '\0' */
2401
0
    if (toconv * 2 >= written) {
2402
0
        xmlBufferGrow(out, out->size + toconv * 2);
2403
0
        written = out->size - out->use - 1;
2404
0
    }
2405
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2406
0
                           in->content, &toconv, 1);
2407
0
    xmlBufferShrink(in, toconv);
2408
0
    out->use += written;
2409
0
    out->content[out->use] = 0;
2410
0
    if (ret == -1)
2411
0
        ret = -3;
2412
2413
0
    switch (ret) {
2414
0
        case 0:
2415
#ifdef DEBUG_ENCODING
2416
            xmlGenericError(xmlGenericErrorContext,
2417
                            "converted %d bytes to %d bytes of input\n",
2418
                            toconv, written);
2419
#endif
2420
0
            break;
2421
0
        case -1:
2422
#ifdef DEBUG_ENCODING
2423
            xmlGenericError(xmlGenericErrorContext,
2424
                         "converted %d bytes to %d bytes of input, %d left\n",
2425
                            toconv, written, in->use);
2426
#endif
2427
0
            break;
2428
0
        case -3:
2429
#ifdef DEBUG_ENCODING
2430
            xmlGenericError(xmlGenericErrorContext,
2431
                        "converted %d bytes to %d bytes of input, %d left\n",
2432
                            toconv, written, in->use);
2433
#endif
2434
0
            break;
2435
0
        case -2: {
2436
0
            char buf[50];
2437
2438
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439
0
         in->content[0], in->content[1],
2440
0
         in->content[2], in->content[3]);
2441
0
      buf[49] = 0;
2442
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2443
0
        "input conversion failed due to input error, bytes %s\n",
2444
0
               buf);
2445
0
        }
2446
0
    }
2447
    /*
2448
     * Ignore when input buffer is not on a boundary
2449
     */
2450
0
    if (ret == -3)
2451
0
        ret = 0;
2452
0
    return (written? written : ret);
2453
0
}
2454
2455
#ifdef LIBXML_OUTPUT_ENABLED
2456
/**
2457
 * xmlCharEncOutput:
2458
 * @output: a parser output buffer
2459
 * @init: is this an initialization call without data
2460
 *
2461
 * Generic front-end for the encoding handler on parser output
2462
 * a first call with @init == 1 has to be made first to initiate the
2463
 * output in case of non-stateless encoding needing to initiate their
2464
 * state or the output (like the BOM in UTF16).
2465
 * In case of UTF8 sequence conversion errors for the given encoder,
2466
 * the content will be automatically remapped to a CharRef sequence.
2467
 *
2468
 * Returns the number of byte written if success, or
2469
 *     -1 general error
2470
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2471
 *        the result of transformation can't fit into the encoding we want), or
2472
 */
2473
int
2474
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2475
0
{
2476
0
    int ret;
2477
0
    size_t written;
2478
0
    int writtentot = 0;
2479
0
    size_t toconv;
2480
0
    int c_in;
2481
0
    int c_out;
2482
0
    xmlBufPtr in;
2483
0
    xmlBufPtr out;
2484
2485
0
    if ((output == NULL) || (output->encoder == NULL) ||
2486
0
        (output->buffer == NULL) || (output->conv == NULL))
2487
0
        return (-1);
2488
0
    out = output->conv;
2489
0
    in = output->buffer;
2490
2491
0
retry:
2492
2493
0
    written = xmlBufAvail(out);
2494
2495
    /*
2496
     * First specific handling of the initialization call
2497
     */
2498
0
    if (init) {
2499
0
        c_in = 0;
2500
0
        c_out = written;
2501
        /* TODO: Check return value. */
2502
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2503
0
                          NULL, &c_in);
2504
0
        xmlBufAddLen(out, c_out);
2505
#ifdef DEBUG_ENCODING
2506
  xmlGenericError(xmlGenericErrorContext,
2507
    "initialized encoder\n");
2508
#endif
2509
0
        return(c_out);
2510
0
    }
2511
2512
    /*
2513
     * Conversion itself.
2514
     */
2515
0
    toconv = xmlBufUse(in);
2516
0
    if (toconv == 0)
2517
0
        return (writtentot);
2518
0
    if (toconv > 64 * 1024)
2519
0
        toconv = 64 * 1024;
2520
0
    if (toconv * 4 >= written) {
2521
0
        xmlBufGrow(out, toconv * 4);
2522
0
        written = xmlBufAvail(out);
2523
0
    }
2524
0
    if (written > 256 * 1024)
2525
0
        written = 256 * 1024;
2526
2527
0
    c_in = toconv;
2528
0
    c_out = written;
2529
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2530
0
                            xmlBufContent(in), &c_in);
2531
0
    xmlBufShrink(in, c_in);
2532
0
    xmlBufAddLen(out, c_out);
2533
0
    writtentot += c_out;
2534
0
    if (ret == -1) {
2535
0
        if (c_out > 0) {
2536
            /* Can be a limitation of iconv or uconv */
2537
0
            goto retry;
2538
0
        }
2539
0
        ret = -3;
2540
0
    }
2541
2542
    /*
2543
     * Attempt to handle error cases
2544
     */
2545
0
    switch (ret) {
2546
0
        case 0:
2547
#ifdef DEBUG_ENCODING
2548
      xmlGenericError(xmlGenericErrorContext,
2549
        "converted %d bytes to %d bytes of output\n",
2550
              c_in, c_out);
2551
#endif
2552
0
      break;
2553
0
        case -1:
2554
#ifdef DEBUG_ENCODING
2555
      xmlGenericError(xmlGenericErrorContext,
2556
        "output conversion failed by lack of space\n");
2557
#endif
2558
0
      break;
2559
0
        case -3:
2560
#ifdef DEBUG_ENCODING
2561
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2562
              c_in, c_out, (int) xmlBufUse(in));
2563
#endif
2564
0
      break;
2565
0
        case -4:
2566
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2567
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2568
0
            ret = -1;
2569
0
            break;
2570
0
        case -2: {
2571
0
      xmlChar charref[20];
2572
0
      int len = (int) xmlBufUse(in);
2573
0
            xmlChar *content = xmlBufContent(in);
2574
0
      int cur, charrefLen;
2575
2576
0
      cur = xmlGetUTF8Char(content, &len);
2577
0
      if (cur <= 0)
2578
0
                break;
2579
2580
#ifdef DEBUG_ENCODING
2581
            xmlGenericError(xmlGenericErrorContext,
2582
                    "handling output conversion error\n");
2583
            xmlGenericError(xmlGenericErrorContext,
2584
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585
                    content[0], content[1],
2586
                    content[2], content[3]);
2587
#endif
2588
            /*
2589
             * Removes the UTF8 sequence, and replace it by a charref
2590
             * and continue the transcoding phase, hoping the error
2591
             * did not mangle the encoder state.
2592
             */
2593
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2594
0
                             "&#%d;", cur);
2595
0
            xmlBufShrink(in, len);
2596
0
            xmlBufGrow(out, charrefLen * 4);
2597
0
            c_out = xmlBufAvail(out);
2598
0
            c_in = charrefLen;
2599
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2600
0
                                    charref, &c_in);
2601
2602
0
      if ((ret < 0) || (c_in != charrefLen)) {
2603
0
    char buf[50];
2604
2605
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606
0
       content[0], content[1],
2607
0
       content[2], content[3]);
2608
0
    buf[49] = 0;
2609
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2610
0
        "output conversion failed due to conv error, bytes %s\n",
2611
0
             buf);
2612
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2613
0
        content[0] = ' ';
2614
0
                break;
2615
0
      }
2616
2617
0
            xmlBufAddLen(out, c_out);
2618
0
            writtentot += c_out;
2619
0
            goto retry;
2620
0
  }
2621
0
    }
2622
0
    return(writtentot ? writtentot : ret);
2623
0
}
2624
#endif
2625
2626
/**
2627
 * xmlCharEncOutFunc:
2628
 * @handler:  char encoding transformation data structure
2629
 * @out:  an xmlBuffer for the output.
2630
 * @in:  an xmlBuffer for the input
2631
 *
2632
 * Generic front-end for the encoding handler output function
2633
 * a first call with @in == NULL has to be made firs to initiate the
2634
 * output in case of non-stateless encoding needing to initiate their
2635
 * state or the output (like the BOM in UTF16).
2636
 * In case of UTF8 sequence conversion errors for the given encoder,
2637
 * the content will be automatically remapped to a CharRef sequence.
2638
 *
2639
 * Returns the number of byte written if success, or
2640
 *     -1 general error
2641
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2642
 *        the result of transformation can't fit into the encoding we want), or
2643
 */
2644
int
2645
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2646
0
                  xmlBufferPtr in) {
2647
0
    int ret;
2648
0
    int written;
2649
0
    int writtentot = 0;
2650
0
    int toconv;
2651
2652
0
    if (handler == NULL) return(-1);
2653
0
    if (out == NULL) return(-1);
2654
2655
0
retry:
2656
2657
0
    written = out->size - out->use;
2658
2659
0
    if (written > 0)
2660
0
  written--; /* Gennady: count '/0' */
2661
2662
    /*
2663
     * First specific handling of in = NULL, i.e. the initialization call
2664
     */
2665
0
    if (in == NULL) {
2666
0
        toconv = 0;
2667
        /* TODO: Check return value. */
2668
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669
0
                          NULL, &toconv);
2670
0
        out->use += written;
2671
0
        out->content[out->use] = 0;
2672
#ifdef DEBUG_ENCODING
2673
  xmlGenericError(xmlGenericErrorContext,
2674
    "initialized encoder\n");
2675
#endif
2676
0
        return(0);
2677
0
    }
2678
2679
    /*
2680
     * Conversion itself.
2681
     */
2682
0
    toconv = in->use;
2683
0
    if (toconv == 0)
2684
0
  return(0);
2685
0
    if (toconv * 4 >= written) {
2686
0
        xmlBufferGrow(out, toconv * 4);
2687
0
  written = out->size - out->use - 1;
2688
0
    }
2689
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690
0
                            in->content, &toconv);
2691
0
    xmlBufferShrink(in, toconv);
2692
0
    out->use += written;
2693
0
    writtentot += written;
2694
0
    out->content[out->use] = 0;
2695
0
    if (ret == -1) {
2696
0
        if (written > 0) {
2697
            /* Can be a limitation of iconv or uconv */
2698
0
            goto retry;
2699
0
        }
2700
0
        ret = -3;
2701
0
    }
2702
2703
    /*
2704
     * Attempt to handle error cases
2705
     */
2706
0
    switch (ret) {
2707
0
        case 0:
2708
#ifdef DEBUG_ENCODING
2709
      xmlGenericError(xmlGenericErrorContext,
2710
        "converted %d bytes to %d bytes of output\n",
2711
              toconv, written);
2712
#endif
2713
0
      break;
2714
0
        case -1:
2715
#ifdef DEBUG_ENCODING
2716
      xmlGenericError(xmlGenericErrorContext,
2717
        "output conversion failed by lack of space\n");
2718
#endif
2719
0
      break;
2720
0
        case -3:
2721
#ifdef DEBUG_ENCODING
2722
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723
              toconv, written, in->use);
2724
#endif
2725
0
      break;
2726
0
        case -4:
2727
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2729
0
      ret = -1;
2730
0
            break;
2731
0
        case -2: {
2732
0
      xmlChar charref[20];
2733
0
      int len = in->use;
2734
0
      const xmlChar *utf = (const xmlChar *) in->content;
2735
0
      int cur, charrefLen;
2736
2737
0
      cur = xmlGetUTF8Char(utf, &len);
2738
0
      if (cur <= 0)
2739
0
                break;
2740
2741
#ifdef DEBUG_ENCODING
2742
            xmlGenericError(xmlGenericErrorContext,
2743
                    "handling output conversion error\n");
2744
            xmlGenericError(xmlGenericErrorContext,
2745
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746
                    in->content[0], in->content[1],
2747
                    in->content[2], in->content[3]);
2748
#endif
2749
            /*
2750
             * Removes the UTF8 sequence, and replace it by a charref
2751
             * and continue the transcoding phase, hoping the error
2752
             * did not mangle the encoder state.
2753
             */
2754
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755
0
                             "&#%d;", cur);
2756
0
            xmlBufferShrink(in, len);
2757
0
            xmlBufferGrow(out, charrefLen * 4);
2758
0
      written = out->size - out->use - 1;
2759
0
            toconv = charrefLen;
2760
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761
0
                                    charref, &toconv);
2762
2763
0
      if ((ret < 0) || (toconv != charrefLen)) {
2764
0
    char buf[50];
2765
2766
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767
0
       in->content[0], in->content[1],
2768
0
       in->content[2], in->content[3]);
2769
0
    buf[49] = 0;
2770
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2771
0
        "output conversion failed due to conv error, bytes %s\n",
2772
0
             buf);
2773
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774
0
        in->content[0] = ' ';
2775
0
          break;
2776
0
      }
2777
2778
0
            out->use += written;
2779
0
            writtentot += written;
2780
0
            out->content[out->use] = 0;
2781
0
            goto retry;
2782
0
  }
2783
0
    }
2784
0
    return(writtentot ? writtentot : ret);
2785
0
}
2786
2787
/**
2788
 * xmlCharEncCloseFunc:
2789
 * @handler:  char encoding transformation data structure
2790
 *
2791
 * Generic front-end for encoding handler close function
2792
 *
2793
 * Returns 0 if success, or -1 in case of error
2794
 */
2795
int
2796
4.32k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2797
4.32k
    int ret = 0;
2798
4.32k
    int tofree = 0;
2799
4.32k
    int i, handler_in_list = 0;
2800
2801
    /* Avoid unused variable warning if features are disabled. */
2802
4.32k
    (void) handler_in_list;
2803
2804
4.32k
    if (handler == NULL) return(-1);
2805
4.32k
    if (handler->name == NULL) return(-1);
2806
4.32k
    if (handlers != NULL) {
2807
33.9k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2808
30.4k
            if (handler == handlers[i]) {
2809
819
          handler_in_list = 1;
2810
819
    break;
2811
819
      }
2812
30.4k
  }
2813
4.32k
    }
2814
4.32k
#ifdef LIBXML_ICONV_ENABLED
2815
    /*
2816
     * Iconv handlers can be used only once, free the whole block.
2817
     * and the associated icon resources.
2818
     */
2819
4.32k
    if ((handler_in_list == 0) &&
2820
4.32k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821
3.50k
        tofree = 1;
2822
3.50k
  if (handler->iconv_out != NULL) {
2823
3.50k
      if (iconv_close(handler->iconv_out))
2824
0
    ret = -1;
2825
3.50k
      handler->iconv_out = NULL;
2826
3.50k
  }
2827
3.50k
  if (handler->iconv_in != NULL) {
2828
3.50k
      if (iconv_close(handler->iconv_in))
2829
0
    ret = -1;
2830
3.50k
      handler->iconv_in = NULL;
2831
3.50k
  }
2832
3.50k
    }
2833
4.32k
#endif /* LIBXML_ICONV_ENABLED */
2834
#ifdef LIBXML_ICU_ENABLED
2835
    if ((handler_in_list == 0) &&
2836
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837
        tofree = 1;
2838
  if (handler->uconv_out != NULL) {
2839
      closeIcuConverter(handler->uconv_out);
2840
      handler->uconv_out = NULL;
2841
  }
2842
  if (handler->uconv_in != NULL) {
2843
      closeIcuConverter(handler->uconv_in);
2844
      handler->uconv_in = NULL;
2845
  }
2846
    }
2847
#endif
2848
4.32k
    if (tofree) {
2849
        /* free up only dynamic handlers iconv/uconv */
2850
3.50k
        if (handler->name != NULL)
2851
3.50k
            xmlFree(handler->name);
2852
3.50k
        handler->name = NULL;
2853
3.50k
        xmlFree(handler);
2854
3.50k
    }
2855
#ifdef DEBUG_ENCODING
2856
    if (ret)
2857
        xmlGenericError(xmlGenericErrorContext,
2858
    "failed to close the encoding handler\n");
2859
    else
2860
        xmlGenericError(xmlGenericErrorContext,
2861
    "closed the encoding handler\n");
2862
#endif
2863
2864
4.32k
    return(ret);
2865
4.32k
}
2866
2867
/**
2868
 * xmlByteConsumed:
2869
 * @ctxt: an XML parser context
2870
 *
2871
 * This function provides the current index of the parser relative
2872
 * to the start of the current entity. This function is computed in
2873
 * bytes from the beginning starting at zero and finishing at the
2874
 * size in byte of the file if parsing a file. The function is
2875
 * of constant cost if the input is UTF-8 but can be costly if run
2876
 * on non-UTF-8 input.
2877
 *
2878
 * Returns the index in bytes from the beginning of the entity or -1
2879
 *         in case the index could not be computed.
2880
 */
2881
long
2882
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2883
0
    xmlParserInputPtr in;
2884
2885
0
    if (ctxt == NULL) return(-1);
2886
0
    in = ctxt->input;
2887
0
    if (in == NULL)  return(-1);
2888
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889
0
        unsigned int unused = 0;
2890
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2891
        /*
2892
   * Encoding conversion, compute the number of unused original
2893
   * bytes from the input not consumed and subtract that from
2894
   * the raw consumed value, this is not a cheap operation
2895
   */
2896
0
        if (in->end - in->cur > 0) {
2897
0
      unsigned char convbuf[32000];
2898
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2899
0
      int toconv = in->end - in->cur, written = 32000;
2900
2901
0
      int ret;
2902
2903
0
            do {
2904
0
                toconv = in->end - cur;
2905
0
                written = 32000;
2906
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907
0
                                        cur, &toconv);
2908
0
                if (ret < 0) {
2909
0
                    if (written > 0)
2910
0
                        ret = -2;
2911
0
                    else
2912
0
                        return(-1);
2913
0
                }
2914
0
                unused += written;
2915
0
                cur += toconv;
2916
0
            } while (ret == -2);
2917
0
  }
2918
0
  if (in->buf->rawconsumed < unused)
2919
0
      return(-1);
2920
0
  return(in->buf->rawconsumed - unused);
2921
0
    }
2922
0
    return(in->consumed + (in->cur - in->base));
2923
0
}
2924
2925
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926
#ifdef LIBXML_ISO8859X_ENABLED
2927
2928
/**
2929
 * UTF8ToISO8859x:
2930
 * @out:  a pointer to an array of bytes to store the result
2931
 * @outlen:  the length of @out
2932
 * @in:  a pointer to an array of UTF-8 chars
2933
 * @inlen:  the length of @in
2934
 * @xlattable: the 2-level transcoding table
2935
 *
2936
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937
 * block of chars out.
2938
 *
2939
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940
 * The value of @inlen after return is the number of octets consumed
2941
 *     as the return value is positive, else unpredictable.
2942
 * The value of @outlen after return is the number of octets consumed.
2943
 */
2944
static int
2945
UTF8ToISO8859x(unsigned char* out, int *outlen,
2946
              const unsigned char* in, int *inlen,
2947
              const unsigned char* const xlattable) {
2948
    const unsigned char* outstart = out;
2949
    const unsigned char* inend;
2950
    const unsigned char* instart = in;
2951
    const unsigned char* processed = in;
2952
2953
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2954
        (xlattable == NULL))
2955
  return(-1);
2956
    if (in == NULL) {
2957
        /*
2958
        * initialization nothing to do
2959
        */
2960
        *outlen = 0;
2961
        *inlen = 0;
2962
        return(0);
2963
    }
2964
    inend = in + (*inlen);
2965
    while (in < inend) {
2966
        unsigned char d = *in++;
2967
        if  (d < 0x80)  {
2968
            *out++ = d;
2969
        } else if (d < 0xC0) {
2970
            /* trailing byte in leading position */
2971
            *outlen = out - outstart;
2972
            *inlen = processed - instart;
2973
            return(-2);
2974
        } else if (d < 0xE0) {
2975
            unsigned char c;
2976
            if (!(in < inend)) {
2977
                /* trailing byte not in input buffer */
2978
                *outlen = out - outstart;
2979
                *inlen = processed - instart;
2980
                return(-3);
2981
            }
2982
            c = *in++;
2983
            if ((c & 0xC0) != 0x80) {
2984
                /* not a trailing byte */
2985
                *outlen = out - outstart;
2986
                *inlen = processed - instart;
2987
                return(-2);
2988
            }
2989
            c = c & 0x3F;
2990
            d = d & 0x1F;
2991
            d = xlattable [48 + c + xlattable [d] * 64];
2992
            if (d == 0) {
2993
                /* not in character set */
2994
                *outlen = out - outstart;
2995
                *inlen = processed - instart;
2996
                return(-2);
2997
            }
2998
            *out++ = d;
2999
        } else if (d < 0xF0) {
3000
            unsigned char c1;
3001
            unsigned char c2;
3002
            if (!(in < inend - 1)) {
3003
                /* trailing bytes not in input buffer */
3004
                *outlen = out - outstart;
3005
                *inlen = processed - instart;
3006
                return(-3);
3007
            }
3008
            c1 = *in++;
3009
            if ((c1 & 0xC0) != 0x80) {
3010
                /* not a trailing byte (c1) */
3011
                *outlen = out - outstart;
3012
                *inlen = processed - instart;
3013
                return(-2);
3014
            }
3015
            c2 = *in++;
3016
            if ((c2 & 0xC0) != 0x80) {
3017
                /* not a trailing byte (c2) */
3018
                *outlen = out - outstart;
3019
                *inlen = processed - instart;
3020
                return(-2);
3021
            }
3022
            c1 = c1 & 0x3F;
3023
            c2 = c2 & 0x3F;
3024
      d = d & 0x0F;
3025
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3026
      xlattable [32 + d] * 64] * 64];
3027
            if (d == 0) {
3028
                /* not in character set */
3029
                *outlen = out - outstart;
3030
                *inlen = processed - instart;
3031
                return(-2);
3032
            }
3033
            *out++ = d;
3034
        } else {
3035
            /* cannot transcode >= U+010000 */
3036
            *outlen = out - outstart;
3037
            *inlen = processed - instart;
3038
            return(-2);
3039
        }
3040
        processed = in;
3041
    }
3042
    *outlen = out - outstart;
3043
    *inlen = processed - instart;
3044
    return(*outlen);
3045
}
3046
3047
/**
3048
 * ISO8859xToUTF8
3049
 * @out:  a pointer to an array of bytes to store the result
3050
 * @outlen:  the length of @out
3051
 * @in:  a pointer to an array of ISO Latin 1 chars
3052
 * @inlen:  the length of @in
3053
 *
3054
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055
 * block of chars out.
3056
 * Returns 0 if success, or -1 otherwise
3057
 * The value of @inlen after return is the number of octets consumed
3058
 * The value of @outlen after return is the number of octets produced.
3059
 */
3060
static int
3061
ISO8859xToUTF8(unsigned char* out, int *outlen,
3062
              const unsigned char* in, int *inlen,
3063
              unsigned short const *unicodetable) {
3064
    unsigned char* outstart = out;
3065
    unsigned char* outend;
3066
    const unsigned char* instart = in;
3067
    const unsigned char* inend;
3068
    const unsigned char* instop;
3069
    unsigned int c;
3070
3071
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3072
        (in == NULL) || (unicodetable == NULL))
3073
  return(-1);
3074
    outend = out + *outlen;
3075
    inend = in + *inlen;
3076
    instop = inend;
3077
3078
    while ((in < inend) && (out < outend - 2)) {
3079
        if (*in >= 0x80) {
3080
            c = unicodetable [*in - 0x80];
3081
            if (c == 0) {
3082
                /* undefined code point */
3083
                *outlen = out - outstart;
3084
                *inlen = in - instart;
3085
                return (-1);
3086
            }
3087
            if (c < 0x800) {
3088
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3089
                *out++ = (c & 0x3F) | 0x80;
3090
            } else {
3091
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3092
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3093
                *out++ = (c & 0x3F) | 0x80;
3094
            }
3095
            ++in;
3096
        }
3097
        if (instop - in > outend - out) instop = in + (outend - out);
3098
        while ((*in < 0x80) && (in < instop)) {
3099
            *out++ = *in++;
3100
        }
3101
    }
3102
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3103
        *out++ =  *in++;
3104
    }
3105
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3106
        *out++ =  *in++;
3107
    }
3108
    *outlen = out - outstart;
3109
    *inlen = in - instart;
3110
    return (*outlen);
3111
}
3112
3113
3114
/************************************************************************
3115
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3116
 ************************************************************************/
3117
3118
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3119
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3135
};
3136
3137
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3138
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3165
};
3166
3167
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3168
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3184
};
3185
3186
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3187
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3218
};
3219
3220
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3221
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3237
};
3238
3239
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3240
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3267
};
3268
3269
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3270
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3286
};
3287
3288
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3289
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
};
3317
3318
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3319
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3335
};
3336
3337
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3338
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
};
3362
3363
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3364
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3380
};
3381
3382
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3383
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
};
3415
3416
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3417
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3433
};
3434
3435
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3436
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467
};
3468
3469
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3470
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3486
};
3487
3488
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3489
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
};
3513
3514
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3515
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3531
};
3532
3533
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3534
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3565
};
3566
3567
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3568
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3584
};
3585
3586
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3587
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
};
3615
3616
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3617
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3633
};
3634
3635
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3636
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3667
};
3668
3669
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3670
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3686
};
3687
3688
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3689
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3732
};
3733
3734
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3735
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3751
};
3752
3753
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3754
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3781
};
3782
3783
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3784
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3800
};
3801
3802
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3803
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3842
};
3843
3844
3845
/*
3846
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3847
 */
3848
3849
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3850
    const unsigned char* in, int *inlen) {
3851
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3852
}
3853
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3854
    const unsigned char* in, int *inlen) {
3855
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3856
}
3857
3858
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3859
    const unsigned char* in, int *inlen) {
3860
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3861
}
3862
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3863
    const unsigned char* in, int *inlen) {
3864
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3865
}
3866
3867
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3868
    const unsigned char* in, int *inlen) {
3869
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3870
}
3871
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3874
}
3875
3876
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3877
    const unsigned char* in, int *inlen) {
3878
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3879
}
3880
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3883
}
3884
3885
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3886
    const unsigned char* in, int *inlen) {
3887
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3888
}
3889
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3892
}
3893
3894
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3895
    const unsigned char* in, int *inlen) {
3896
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3897
}
3898
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3901
}
3902
3903
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3904
    const unsigned char* in, int *inlen) {
3905
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3906
}
3907
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3910
}
3911
3912
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3913
    const unsigned char* in, int *inlen) {
3914
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3915
}
3916
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3919
}
3920
3921
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3922
    const unsigned char* in, int *inlen) {
3923
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3924
}
3925
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3928
}
3929
3930
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3931
    const unsigned char* in, int *inlen) {
3932
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3933
}
3934
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3937
}
3938
3939
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3940
    const unsigned char* in, int *inlen) {
3941
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3942
}
3943
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3946
}
3947
3948
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3949
    const unsigned char* in, int *inlen) {
3950
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3951
}
3952
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3955
}
3956
3957
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3958
    const unsigned char* in, int *inlen) {
3959
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3960
}
3961
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3964
}
3965
3966
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3967
    const unsigned char* in, int *inlen) {
3968
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3969
}
3970
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3973
}
3974
3975
static void
3976
xmlRegisterCharEncodingHandlersISO8859x (void) {
3977
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3978
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3979
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3980
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3981
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3982
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3983
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3984
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3985
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3986
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3987
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3988
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3989
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3990
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3991
}
3992
3993
#endif
3994
#endif
3995