Coverage Report

Created: 2023-05-11 17:20

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
62
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
63
64
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
65
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
66
struct _xmlCharEncodingAlias {
67
    const char *name;
68
    const char *alias;
69
};
70
71
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
72
static int xmlCharEncodingAliasesNb = 0;
73
static int xmlCharEncodingAliasesMax = 0;
74
75
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
76
#if 0
77
#define DEBUG_ENCODING  /* Define this to get encoding traces */
78
#endif
79
#else
80
#ifdef LIBXML_ISO8859X_ENABLED
81
static void xmlRegisterCharEncodingHandlersISO8859x (void);
82
#endif
83
#endif
84
85
static int xmlLittleEndian = 1;
86
87
/**
88
 * xmlEncodingErrMemory:
89
 * @extra:  extra information
90
 *
91
 * Handle an out of memory condition
92
 */
93
static void
94
xmlEncodingErrMemory(const char *extra)
95
0
{
96
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
97
0
}
98
99
/**
100
 * xmlErrEncoding:
101
 * @error:  the error number
102
 * @msg:  the error message
103
 *
104
 * n encoding error
105
 */
106
static void LIBXML_ATTR_FORMAT(2,0)
107
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
108
32.6k
{
109
32.6k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
110
32.6k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
111
32.6k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
112
32.6k
}
113
114
#ifdef LIBXML_ICU_ENABLED
115
static uconv_t*
116
openIcuConverter(const char* name, int toUnicode)
117
{
118
  UErrorCode status = U_ZERO_ERROR;
119
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
120
  if (conv == NULL)
121
    return NULL;
122
123
  conv->pivot_source = conv->pivot_buf;
124
  conv->pivot_target = conv->pivot_buf;
125
126
  conv->uconv = ucnv_open(name, &status);
127
  if (U_FAILURE(status))
128
    goto error;
129
130
  status = U_ZERO_ERROR;
131
  if (toUnicode) {
132
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
133
                        NULL, NULL, NULL, &status);
134
  }
135
  else {
136
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
137
                        NULL, NULL, NULL, &status);
138
  }
139
  if (U_FAILURE(status))
140
    goto error;
141
142
  status = U_ZERO_ERROR;
143
  conv->utf8 = ucnv_open("UTF-8", &status);
144
  if (U_SUCCESS(status))
145
    return conv;
146
147
error:
148
  if (conv->uconv)
149
    ucnv_close(conv->uconv);
150
  xmlFree(conv);
151
  return NULL;
152
}
153
154
static void
155
closeIcuConverter(uconv_t *conv)
156
{
157
  if (conv != NULL) {
158
    ucnv_close(conv->uconv);
159
    ucnv_close(conv->utf8);
160
    xmlFree(conv);
161
  }
162
}
163
#endif /* LIBXML_ICU_ENABLED */
164
165
/************************************************************************
166
 *                  *
167
 *    Conversions To/From UTF8 encoding     *
168
 *                  *
169
 ************************************************************************/
170
171
/**
172
 * asciiToUTF8:
173
 * @out:  a pointer to an array of bytes to store the result
174
 * @outlen:  the length of @out
175
 * @in:  a pointer to an array of ASCII chars
176
 * @inlen:  the length of @in
177
 *
178
 * Take a block of ASCII chars in and try to convert it to an UTF-8
179
 * block of chars out.
180
 * Returns 0 if success, or -1 otherwise
181
 * The value of @inlen after return is the number of octets consumed
182
 *     if the return value is positive, else unpredictable.
183
 * The value of @outlen after return is the number of octets produced.
184
 */
185
static int
186
asciiToUTF8(unsigned char* out, int *outlen,
187
318k
              const unsigned char* in, int *inlen) {
188
318k
    unsigned char* outstart = out;
189
318k
    const unsigned char* base = in;
190
318k
    const unsigned char* processed = in;
191
318k
    unsigned char* outend = out + *outlen;
192
318k
    const unsigned char* inend;
193
318k
    unsigned int c;
194
195
318k
    inend = in + (*inlen);
196
8.49M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
197
8.46M
  c= *in++;
198
199
8.46M
        if (out >= outend)
200
0
      break;
201
8.46M
        if (c < 0x80) {
202
8.17M
      *out++ = c;
203
8.17M
  } else {
204
291k
      *outlen = out - outstart;
205
291k
      *inlen = processed - base;
206
291k
      return(-1);
207
291k
  }
208
209
8.17M
  processed = (const unsigned char*) in;
210
8.17M
    }
211
26.5k
    *outlen = out - outstart;
212
26.5k
    *inlen = processed - base;
213
26.5k
    return(*outlen);
214
318k
}
215
216
#ifdef LIBXML_OUTPUT_ENABLED
217
/**
218
 * UTF8Toascii:
219
 * @out:  a pointer to an array of bytes to store the result
220
 * @outlen:  the length of @out
221
 * @in:  a pointer to an array of UTF-8 chars
222
 * @inlen:  the length of @in
223
 *
224
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
225
 * block of chars out.
226
 *
227
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
228
 * The value of @inlen after return is the number of octets consumed
229
 *     if the return value is positive, else unpredictable.
230
 * The value of @outlen after return is the number of octets produced.
231
 */
232
static int
233
UTF8Toascii(unsigned char* out, int *outlen,
234
27.6k
              const unsigned char* in, int *inlen) {
235
27.6k
    const unsigned char* processed = in;
236
27.6k
    const unsigned char* outend;
237
27.6k
    const unsigned char* outstart = out;
238
27.6k
    const unsigned char* instart = in;
239
27.6k
    const unsigned char* inend;
240
27.6k
    unsigned int c, d;
241
27.6k
    int trailing;
242
243
27.6k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
244
27.6k
    if (in == NULL) {
245
        /*
246
   * initialization nothing to do
247
   */
248
1.47k
  *outlen = 0;
249
1.47k
  *inlen = 0;
250
1.47k
  return(0);
251
1.47k
    }
252
26.1k
    inend = in + (*inlen);
253
26.1k
    outend = out + (*outlen);
254
3.47M
    while (in < inend) {
255
3.46M
  d = *in++;
256
3.46M
  if      (d < 0x80)  { c= d; trailing= 0; }
257
12.0k
  else if (d < 0xC0) {
258
      /* trailing byte in leading position */
259
0
      *outlen = out - outstart;
260
0
      *inlen = processed - instart;
261
0
      return(-2);
262
12.0k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
263
715
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
264
270
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
265
0
  else {
266
      /* no chance for this in Ascii */
267
0
      *outlen = out - outstart;
268
0
      *inlen = processed - instart;
269
0
      return(-2);
270
0
  }
271
272
3.46M
  if (inend - in < trailing) {
273
3
      break;
274
3
  }
275
276
3.47M
  for ( ; trailing; trailing--) {
277
13.0k
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
278
0
    break;
279
13.0k
      c <<= 6;
280
13.0k
      c |= d & 0x3F;
281
13.0k
  }
282
283
  /* assertion: c is a single UTF-4 value */
284
3.46M
  if (c < 0x80) {
285
3.45M
      if (out >= outend)
286
0
    break;
287
3.45M
      *out++ = c;
288
3.45M
  } else {
289
      /* no chance for this in Ascii */
290
12.0k
      *outlen = out - outstart;
291
12.0k
      *inlen = processed - instart;
292
12.0k
      return(-2);
293
12.0k
  }
294
3.45M
  processed = in;
295
3.45M
    }
296
14.1k
    *outlen = out - outstart;
297
14.1k
    *inlen = processed - instart;
298
14.1k
    return(*outlen);
299
26.1k
}
300
#endif /* LIBXML_OUTPUT_ENABLED */
301
302
/**
303
 * isolat1ToUTF8:
304
 * @out:  a pointer to an array of bytes to store the result
305
 * @outlen:  the length of @out
306
 * @in:  a pointer to an array of ISO Latin 1 chars
307
 * @inlen:  the length of @in
308
 *
309
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
310
 * block of chars out.
311
 * Returns the number of bytes written if success, or -1 otherwise
312
 * The value of @inlen after return is the number of octets consumed
313
 *     if the return value is positive, else unpredictable.
314
 * The value of @outlen after return is the number of octets produced.
315
 */
316
int
317
isolat1ToUTF8(unsigned char* out, int *outlen,
318
1.11M
              const unsigned char* in, int *inlen) {
319
1.11M
    unsigned char* outstart = out;
320
1.11M
    const unsigned char* base = in;
321
1.11M
    unsigned char* outend;
322
1.11M
    const unsigned char* inend;
323
1.11M
    const unsigned char* instop;
324
325
1.11M
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
326
0
  return(-1);
327
328
1.11M
    outend = out + *outlen;
329
1.11M
    inend = in + (*inlen);
330
1.11M
    instop = inend;
331
332
27.4M
    while ((in < inend) && (out < outend - 1)) {
333
26.2M
  if (*in >= 0x80) {
334
25.2M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
335
25.2M
            *out++ = ((*in) & 0x3F) | 0x80;
336
25.2M
      ++in;
337
25.2M
  }
338
26.2M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
339
416M
  while ((in < instop) && (*in < 0x80)) {
340
390M
      *out++ = *in++;
341
390M
  }
342
26.2M
    }
343
1.11M
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
344
0
        *out++ = *in++;
345
0
    }
346
1.11M
    *outlen = out - outstart;
347
1.11M
    *inlen = in - base;
348
1.11M
    return(*outlen);
349
1.11M
}
350
351
/**
352
 * UTF8ToUTF8:
353
 * @out:  a pointer to an array of bytes to store the result
354
 * @outlen:  the length of @out
355
 * @inb:  a pointer to an array of UTF-8 chars
356
 * @inlenb:  the length of @in in UTF-8 chars
357
 *
358
 * No op copy operation for UTF8 handling.
359
 *
360
 * Returns the number of bytes written, or -1 if lack of space.
361
 *     The value of *inlen after return is the number of octets consumed
362
 *     if the return value is positive, else unpredictable.
363
 */
364
static int
365
UTF8ToUTF8(unsigned char* out, int *outlen,
366
           const unsigned char* inb, int *inlenb)
367
383k
{
368
383k
    int len;
369
370
383k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
371
0
  return(-1);
372
383k
    if (inb == NULL) {
373
        /* inb == NULL means output is initialized. */
374
9.45k
        *outlen = 0;
375
9.45k
        *inlenb = 0;
376
9.45k
        return(0);
377
9.45k
    }
378
373k
    if (*outlen > *inlenb) {
379
373k
  len = *inlenb;
380
373k
    } else {
381
0
  len = *outlen;
382
0
    }
383
373k
    if (len < 0)
384
0
  return(-1);
385
386
    /*
387
     * FIXME: Conversion functions must assure valid UTF-8, so we have
388
     * to check for UTF-8 validity. Preferably, this converter shouldn't
389
     * be used at all.
390
     */
391
373k
    memcpy(out, inb, len);
392
393
373k
    *outlen = len;
394
373k
    *inlenb = len;
395
373k
    return(*outlen);
396
373k
}
397
398
399
#ifdef LIBXML_OUTPUT_ENABLED
400
/**
401
 * UTF8Toisolat1:
402
 * @out:  a pointer to an array of bytes to store the result
403
 * @outlen:  the length of @out
404
 * @in:  a pointer to an array of UTF-8 chars
405
 * @inlen:  the length of @in
406
 *
407
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
408
 * block of chars out.
409
 *
410
 * Returns the number of bytes written if success, -2 if the transcoding fails,
411
           or -1 otherwise
412
 * The value of @inlen after return is the number of octets consumed
413
 *     if the return value is positive, else unpredictable.
414
 * The value of @outlen after return is the number of octets produced.
415
 */
416
int
417
UTF8Toisolat1(unsigned char* out, int *outlen,
418
405k
              const unsigned char* in, int *inlen) {
419
405k
    const unsigned char* processed = in;
420
405k
    const unsigned char* outend;
421
405k
    const unsigned char* outstart = out;
422
405k
    const unsigned char* instart = in;
423
405k
    const unsigned char* inend;
424
405k
    unsigned int c, d;
425
405k
    int trailing;
426
427
405k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
428
405k
    if (in == NULL) {
429
        /*
430
   * initialization nothing to do
431
   */
432
4.11k
  *outlen = 0;
433
4.11k
  *inlen = 0;
434
4.11k
  return(0);
435
4.11k
    }
436
400k
    inend = in + (*inlen);
437
400k
    outend = out + (*outlen);
438
863M
    while (in < inend) {
439
862M
  d = *in++;
440
862M
  if      (d < 0x80)  { c= d; trailing= 0; }
441
80.5M
  else if (d < 0xC0) {
442
      /* trailing byte in leading position */
443
625
      *outlen = out - outstart;
444
625
      *inlen = processed - instart;
445
625
      return(-2);
446
80.5M
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
447
54.9k
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
448
44.5k
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
449
4
  else {
450
      /* no chance for this in IsoLat1 */
451
4
      *outlen = out - outstart;
452
4
      *inlen = processed - instart;
453
4
      return(-2);
454
4
  }
455
456
862M
  if (inend - in < trailing) {
457
13.4k
      break;
458
13.4k
  }
459
460
943M
  for ( ; trailing; trailing--) {
461
80.6M
      if (in >= inend)
462
0
    break;
463
80.6M
      if (((d= *in++) & 0xC0) != 0x80) {
464
26
    *outlen = out - outstart;
465
26
    *inlen = processed - instart;
466
26
    return(-2);
467
26
      }
468
80.6M
      c <<= 6;
469
80.6M
      c |= d & 0x3F;
470
80.6M
  }
471
472
  /* assertion: c is a single UTF-4 value */
473
862M
  if (c <= 0xFF) {
474
862M
      if (out >= outend)
475
0
    break;
476
862M
      *out++ = c;
477
862M
  } else {
478
      /* no chance for this in IsoLat1 */
479
84.2k
      *outlen = out - outstart;
480
84.2k
      *inlen = processed - instart;
481
84.2k
      return(-2);
482
84.2k
  }
483
862M
  processed = in;
484
862M
    }
485
316k
    *outlen = out - outstart;
486
316k
    *inlen = processed - instart;
487
316k
    return(*outlen);
488
400k
}
489
#endif /* LIBXML_OUTPUT_ENABLED */
490
491
/**
492
 * UTF16LEToUTF8:
493
 * @out:  a pointer to an array of bytes to store the result
494
 * @outlen:  the length of @out
495
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
496
 * @inlenb:  the length of @in in UTF-16LE chars
497
 *
498
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
499
 * block of chars out. This function assumes the endian property
500
 * is the same between the native type of this machine and the
501
 * inputed one.
502
 *
503
 * Returns the number of bytes written, or -1 if lack of space, or -2
504
 *     if the transcoding fails (if *in is not a valid utf16 string)
505
 *     The value of *inlen after return is the number of octets consumed
506
 *     if the return value is positive, else unpredictable.
507
 */
508
static int
509
UTF16LEToUTF8(unsigned char* out, int *outlen,
510
            const unsigned char* inb, int *inlenb)
511
117k
{
512
117k
    unsigned char* outstart = out;
513
117k
    const unsigned char* processed = inb;
514
117k
    unsigned char* outend;
515
117k
    unsigned short* in = (unsigned short*) inb;
516
117k
    unsigned short* inend;
517
117k
    unsigned int c, d, inlen;
518
117k
    unsigned char *tmp;
519
117k
    int bits;
520
521
117k
    if (*outlen == 0) {
522
0
        *inlenb = 0;
523
0
        return(0);
524
0
    }
525
117k
    outend = out + *outlen;
526
117k
    if ((*inlenb % 2) == 1)
527
44.5k
        (*inlenb)--;
528
117k
    inlen = *inlenb / 2;
529
117k
    inend = in + inlen;
530
7.30M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
531
7.19M
        if (xmlLittleEndian) {
532
7.19M
      c= *in++;
533
7.19M
  } else {
534
0
      tmp = (unsigned char *) in;
535
0
      c = *tmp++;
536
0
      c = c | (*tmp << 8);
537
0
      in++;
538
0
  }
539
7.19M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
540
4.87k
      if (in >= inend) {           /* handle split mutli-byte characters */
541
1.64k
    break;
542
1.64k
      }
543
3.22k
      if (xmlLittleEndian) {
544
3.22k
    d = *in++;
545
3.22k
      } else {
546
0
    tmp = (unsigned char *) in;
547
0
    d = *tmp++;
548
0
    d = d | (*tmp << 8);
549
0
    in++;
550
0
      }
551
3.22k
            if ((d & 0xFC00) == 0xDC00) {
552
1.02k
                c &= 0x03FF;
553
1.02k
                c <<= 10;
554
1.02k
                c |= d & 0x03FF;
555
1.02k
                c += 0x10000;
556
1.02k
            }
557
2.20k
            else {
558
2.20k
    *outlen = out - outstart;
559
2.20k
    *inlenb = processed - inb;
560
2.20k
          return(-2);
561
2.20k
      }
562
3.22k
        }
563
564
  /* assertion: c is a single UTF-4 value */
565
7.18M
        if (out >= outend)
566
0
      break;
567
7.18M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
568
7.15M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
569
7.05M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
570
1.02k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
571
572
21.3M
        for ( ; bits >= 0; bits-= 6) {
573
14.2M
            if (out >= outend)
574
0
          break;
575
14.2M
            *out++= ((c >> bits) & 0x3F) | 0x80;
576
14.2M
        }
577
7.18M
  processed = (const unsigned char*) in;
578
7.18M
    }
579
115k
    *outlen = out - outstart;
580
115k
    *inlenb = processed - inb;
581
115k
    return(*outlen);
582
117k
}
583
584
#ifdef LIBXML_OUTPUT_ENABLED
585
/**
586
 * UTF8ToUTF16LE:
587
 * @outb:  a pointer to an array of bytes to store the result
588
 * @outlen:  the length of @outb
589
 * @in:  a pointer to an array of UTF-8 chars
590
 * @inlen:  the length of @in
591
 *
592
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
593
 * block of chars out.
594
 *
595
 * Returns the number of bytes written, or -1 if lack of space, or -2
596
 *     if the transcoding failed.
597
 */
598
static int
599
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
600
            const unsigned char* in, int *inlen)
601
872k
{
602
872k
    unsigned short* out = (unsigned short*) outb;
603
872k
    const unsigned char* processed = in;
604
872k
    const unsigned char *const instart = in;
605
872k
    unsigned short* outstart= out;
606
872k
    unsigned short* outend;
607
872k
    const unsigned char* inend;
608
872k
    unsigned int c, d;
609
872k
    int trailing;
610
872k
    unsigned char *tmp;
611
872k
    unsigned short tmp1, tmp2;
612
613
    /* UTF16LE encoding has no BOM */
614
872k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
615
872k
    if (in == NULL) {
616
130
  *outlen = 0;
617
130
  *inlen = 0;
618
130
  return(0);
619
130
    }
620
872k
    inend= in + *inlen;
621
872k
    outend = out + (*outlen / 2);
622
4.92M
    while (in < inend) {
623
4.50M
      d= *in++;
624
4.50M
      if      (d < 0x80)  { c= d; trailing= 0; }
625
1.07M
      else if (d < 0xC0) {
626
          /* trailing byte in leading position */
627
420k
    *outlen = (out - outstart) * 2;
628
420k
    *inlen = processed - instart;
629
420k
    return(-2);
630
656k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
631
440k
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
632
297k
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
633
360
      else {
634
  /* no chance for this in UTF-16 */
635
360
  *outlen = (out - outstart) * 2;
636
360
  *inlen = processed - instart;
637
360
  return(-2);
638
360
      }
639
640
4.08M
      if (inend - in < trailing) {
641
230
          break;
642
230
      }
643
644
4.22M
      for ( ; trailing; trailing--) {
645
735k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
646
596k
        break;
647
138k
          c <<= 6;
648
138k
          c |= d & 0x3F;
649
138k
      }
650
651
      /* assertion: c is a single UTF-4 value */
652
4.08M
        if (c < 0x10000) {
653
4.05M
            if (out >= outend)
654
0
          break;
655
4.05M
      if (xmlLittleEndian) {
656
4.05M
    *out++ = c;
657
4.05M
      } else {
658
0
    tmp = (unsigned char *) out;
659
0
    *tmp = (unsigned char) c; /* Explicit truncation */
660
0
    *(tmp + 1) = c >> 8 ;
661
0
    out++;
662
0
      }
663
4.05M
        }
664
34.0k
        else if (c < 0x110000) {
665
3.50k
            if (out+1 >= outend)
666
0
          break;
667
3.50k
            c -= 0x10000;
668
3.50k
      if (xmlLittleEndian) {
669
3.50k
    *out++ = 0xD800 | (c >> 10);
670
3.50k
    *out++ = 0xDC00 | (c & 0x03FF);
671
3.50k
      } else {
672
0
    tmp1 = 0xD800 | (c >> 10);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp1 >> 8;
676
0
    out++;
677
678
0
    tmp2 = 0xDC00 | (c & 0x03FF);
679
0
    tmp = (unsigned char *) out;
680
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
681
0
    *(tmp + 1) = tmp2 >> 8;
682
0
    out++;
683
0
      }
684
3.50k
        }
685
30.5k
        else
686
30.5k
      break;
687
4.05M
  processed = in;
688
4.05M
    }
689
451k
    *outlen = (out - outstart) * 2;
690
451k
    *inlen = processed - instart;
691
451k
    return(*outlen);
692
872k
}
693
694
/**
695
 * UTF8ToUTF16:
696
 * @outb:  a pointer to an array of bytes to store the result
697
 * @outlen:  the length of @outb
698
 * @in:  a pointer to an array of UTF-8 chars
699
 * @inlen:  the length of @in
700
 *
701
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
702
 * block of chars out.
703
 *
704
 * Returns the number of bytes written, or -1 if lack of space, or -2
705
 *     if the transcoding failed.
706
 */
707
static int
708
UTF8ToUTF16(unsigned char* outb, int *outlen,
709
            const unsigned char* in, int *inlen)
710
873k
{
711
873k
    if (in == NULL) {
712
  /*
713
   * initialization, add the Byte Order Mark for UTF-16LE
714
   */
715
1.29k
        if (*outlen >= 2) {
716
1.29k
      outb[0] = 0xFF;
717
1.29k
      outb[1] = 0xFE;
718
1.29k
      *outlen = 2;
719
1.29k
      *inlen = 0;
720
#ifdef DEBUG_ENCODING
721
            xmlGenericError(xmlGenericErrorContext,
722
        "Added FFFE Byte Order Mark\n");
723
#endif
724
1.29k
      return(2);
725
1.29k
  }
726
0
  *outlen = 0;
727
0
  *inlen = 0;
728
0
  return(0);
729
1.29k
    }
730
871k
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
731
873k
}
732
#endif /* LIBXML_OUTPUT_ENABLED */
733
734
/**
735
 * UTF16BEToUTF8:
736
 * @out:  a pointer to an array of bytes to store the result
737
 * @outlen:  the length of @out
738
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
739
 * @inlenb:  the length of @in in UTF-16 chars
740
 *
741
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
742
 * block of chars out. This function assumes the endian property
743
 * is the same between the native type of this machine and the
744
 * inputed one.
745
 *
746
 * Returns the number of bytes written, or -1 if lack of space, or -2
747
 *     if the transcoding fails (if *in is not a valid utf16 string)
748
 * The value of *inlen after return is the number of octets consumed
749
 *     if the return value is positive, else unpredictable.
750
 */
751
static int
752
UTF16BEToUTF8(unsigned char* out, int *outlen,
753
            const unsigned char* inb, int *inlenb)
754
174k
{
755
174k
    unsigned char* outstart = out;
756
174k
    const unsigned char* processed = inb;
757
174k
    unsigned char* outend;
758
174k
    unsigned short* in = (unsigned short*) inb;
759
174k
    unsigned short* inend;
760
174k
    unsigned int c, d, inlen;
761
174k
    unsigned char *tmp;
762
174k
    int bits;
763
764
174k
    if (*outlen == 0) {
765
0
        *inlenb = 0;
766
0
        return(0);
767
0
    }
768
174k
    outend = out + *outlen;
769
174k
    if ((*inlenb % 2) == 1)
770
70.0k
        (*inlenb)--;
771
174k
    inlen = *inlenb / 2;
772
174k
    inend= in + inlen;
773
8.43M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
774
8.26M
  if (xmlLittleEndian) {
775
8.26M
      tmp = (unsigned char *) in;
776
8.26M
      c = *tmp++;
777
8.26M
      c = (c << 8) | *tmp;
778
8.26M
      in++;
779
8.26M
  } else {
780
0
      c= *in++;
781
0
  }
782
8.26M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
783
4.82k
      if (in >= inend) {           /* handle split mutli-byte characters */
784
2.34k
                break;
785
2.34k
      }
786
2.47k
      if (xmlLittleEndian) {
787
2.47k
    tmp = (unsigned char *) in;
788
2.47k
    d = *tmp++;
789
2.47k
    d = (d << 8) | *tmp;
790
2.47k
    in++;
791
2.47k
      } else {
792
0
    d= *in++;
793
0
      }
794
2.47k
            if ((d & 0xFC00) == 0xDC00) {
795
1.05k
                c &= 0x03FF;
796
1.05k
                c <<= 10;
797
1.05k
                c |= d & 0x03FF;
798
1.05k
                c += 0x10000;
799
1.05k
            }
800
1.42k
            else {
801
1.42k
    *outlen = out - outstart;
802
1.42k
    *inlenb = processed - inb;
803
1.42k
          return(-2);
804
1.42k
      }
805
2.47k
        }
806
807
  /* assertion: c is a single UTF-4 value */
808
8.25M
        if (out >= outend)
809
0
      break;
810
8.25M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
811
8.20M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
812
8.10M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
813
1.05k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
814
815
24.5M
        for ( ; bits >= 0; bits-= 6) {
816
16.3M
            if (out >= outend)
817
0
          break;
818
16.3M
            *out++= ((c >> bits) & 0x3F) | 0x80;
819
16.3M
        }
820
8.25M
  processed = (const unsigned char*) in;
821
8.25M
    }
822
173k
    *outlen = out - outstart;
823
173k
    *inlenb = processed - inb;
824
173k
    return(*outlen);
825
174k
}
826
827
#ifdef LIBXML_OUTPUT_ENABLED
828
/**
829
 * UTF8ToUTF16BE:
830
 * @outb:  a pointer to an array of bytes to store the result
831
 * @outlen:  the length of @outb
832
 * @in:  a pointer to an array of UTF-8 chars
833
 * @inlen:  the length of @in
834
 *
835
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
836
 * block of chars out.
837
 *
838
 * Returns the number of byte written, or -1 by lack of space, or -2
839
 *     if the transcoding failed.
840
 */
841
static int
842
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
843
            const unsigned char* in, int *inlen)
844
238
{
845
238
    unsigned short* out = (unsigned short*) outb;
846
238
    const unsigned char* processed = in;
847
238
    const unsigned char *const instart = in;
848
238
    unsigned short* outstart= out;
849
238
    unsigned short* outend;
850
238
    const unsigned char* inend;
851
238
    unsigned int c, d;
852
238
    int trailing;
853
238
    unsigned char *tmp;
854
238
    unsigned short tmp1, tmp2;
855
856
    /* UTF-16BE has no BOM */
857
238
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
858
238
    if (in == NULL) {
859
119
  *outlen = 0;
860
119
  *inlen = 0;
861
119
  return(0);
862
119
    }
863
119
    inend= in + *inlen;
864
119
    outend = out + (*outlen / 2);
865
7.66k
    while (in < inend) {
866
7.54k
      d= *in++;
867
7.54k
      if      (d < 0x80)  { c= d; trailing= 0; }
868
0
      else if (d < 0xC0)  {
869
          /* trailing byte in leading position */
870
0
    *outlen = out - outstart;
871
0
    *inlen = processed - instart;
872
0
    return(-2);
873
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
874
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
875
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
876
0
      else {
877
          /* no chance for this in UTF-16 */
878
0
    *outlen = out - outstart;
879
0
    *inlen = processed - instart;
880
0
    return(-2);
881
0
      }
882
883
7.54k
      if (inend - in < trailing) {
884
0
          break;
885
0
      }
886
887
7.54k
      for ( ; trailing; trailing--) {
888
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
889
0
          c <<= 6;
890
0
          c |= d & 0x3F;
891
0
      }
892
893
      /* assertion: c is a single UTF-4 value */
894
7.54k
        if (c < 0x10000) {
895
7.54k
            if (out >= outend)  break;
896
7.54k
      if (xmlLittleEndian) {
897
7.54k
    tmp = (unsigned char *) out;
898
7.54k
    *tmp = c >> 8;
899
7.54k
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
900
7.54k
    out++;
901
7.54k
      } else {
902
0
    *out++ = c;
903
0
      }
904
7.54k
        }
905
0
        else if (c < 0x110000) {
906
0
            if (out+1 >= outend)  break;
907
0
            c -= 0x10000;
908
0
      if (xmlLittleEndian) {
909
0
    tmp1 = 0xD800 | (c >> 10);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp1 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
913
0
    out++;
914
915
0
    tmp2 = 0xDC00 | (c & 0x03FF);
916
0
    tmp = (unsigned char *) out;
917
0
    *tmp = tmp2 >> 8;
918
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
919
0
    out++;
920
0
      } else {
921
0
    *out++ = 0xD800 | (c >> 10);
922
0
    *out++ = 0xDC00 | (c & 0x03FF);
923
0
      }
924
0
        }
925
0
        else
926
0
      break;
927
7.54k
  processed = in;
928
7.54k
    }
929
119
    *outlen = (out - outstart) * 2;
930
119
    *inlen = processed - instart;
931
119
    return(*outlen);
932
119
}
933
#endif /* LIBXML_OUTPUT_ENABLED */
934
935
/************************************************************************
936
 *                  *
937
 *    Generic encoding handling routines      *
938
 *                  *
939
 ************************************************************************/
940
941
/**
942
 * xmlDetectCharEncoding:
943
 * @in:  a pointer to the first bytes of the XML entity, must be at least
944
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
945
 * @len:  pointer to the length of the buffer
946
 *
947
 * Guess the encoding of the entity using the first bytes of the entity content
948
 * according to the non-normative appendix F of the XML-1.0 recommendation.
949
 *
950
 * Returns one of the XML_CHAR_ENCODING_... values.
951
 */
952
xmlCharEncoding
953
xmlDetectCharEncoding(const unsigned char* in, int len)
954
5.93M
{
955
5.93M
    if (in == NULL)
956
0
        return(XML_CHAR_ENCODING_NONE);
957
5.93M
    if (len >= 4) {
958
5.93M
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
5.93M
      (in[2] == 0x00) && (in[3] == 0x3C))
960
488
      return(XML_CHAR_ENCODING_UCS4BE);
961
5.93M
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
962
5.93M
      (in[2] == 0x00) && (in[3] == 0x00))
963
2.33k
      return(XML_CHAR_ENCODING_UCS4LE);
964
5.93M
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
965
5.93M
      (in[2] == 0x3C) && (in[3] == 0x00))
966
130
      return(XML_CHAR_ENCODING_UCS4_2143);
967
5.93M
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
968
5.93M
      (in[2] == 0x00) && (in[3] == 0x00))
969
159
      return(XML_CHAR_ENCODING_UCS4_3412);
970
5.93M
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
971
5.93M
      (in[2] == 0xA7) && (in[3] == 0x94))
972
13.3k
      return(XML_CHAR_ENCODING_EBCDIC);
973
5.92M
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
974
5.92M
      (in[2] == 0x78) && (in[3] == 0x6D))
975
625k
      return(XML_CHAR_ENCODING_UTF8);
976
  /*
977
   * Although not part of the recommendation, we also
978
   * attempt an "auto-recognition" of UTF-16LE and
979
   * UTF-16BE encodings.
980
   */
981
5.29M
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
982
5.29M
      (in[2] == 0x3F) && (in[3] == 0x00))
983
970
      return(XML_CHAR_ENCODING_UTF16LE);
984
5.29M
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
985
5.29M
      (in[2] == 0x00) && (in[3] == 0x3F))
986
936
      return(XML_CHAR_ENCODING_UTF16BE);
987
5.29M
    }
988
5.29M
    if (len >= 3) {
989
  /*
990
   * Errata on XML-1.0 June 20 2001
991
   * We now allow an UTF8 encoded BOM
992
   */
993
5.29M
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
994
5.29M
      (in[2] == 0xBF))
995
4.61k
      return(XML_CHAR_ENCODING_UTF8);
996
5.29M
    }
997
    /* For UTF-16 we can recognize by the BOM */
998
5.29M
    if (len >= 2) {
999
5.29M
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
1000
5.19k
      return(XML_CHAR_ENCODING_UTF16BE);
1001
5.28M
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
1002
7.52k
      return(XML_CHAR_ENCODING_UTF16LE);
1003
5.28M
    }
1004
5.27M
    return(XML_CHAR_ENCODING_NONE);
1005
5.29M
}
1006
1007
/**
1008
 * xmlCleanupEncodingAliases:
1009
 *
1010
 * Unregisters all aliases
1011
 */
1012
void
1013
0
xmlCleanupEncodingAliases(void) {
1014
0
    int i;
1015
1016
0
    if (xmlCharEncodingAliases == NULL)
1017
0
  return;
1018
1019
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1020
0
  if (xmlCharEncodingAliases[i].name != NULL)
1021
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1022
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1023
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1024
0
    }
1025
0
    xmlCharEncodingAliasesNb = 0;
1026
0
    xmlCharEncodingAliasesMax = 0;
1027
0
    xmlFree(xmlCharEncodingAliases);
1028
0
    xmlCharEncodingAliases = NULL;
1029
0
}
1030
1031
/**
1032
 * xmlGetEncodingAlias:
1033
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1034
 *
1035
 * Lookup an encoding name for the given alias.
1036
 *
1037
 * Returns NULL if not found, otherwise the original name
1038
 */
1039
const char *
1040
323k
xmlGetEncodingAlias(const char *alias) {
1041
323k
    int i;
1042
323k
    char upper[100];
1043
1044
323k
    if (alias == NULL)
1045
0
  return(NULL);
1046
1047
323k
    if (xmlCharEncodingAliases == NULL)
1048
323k
  return(NULL);
1049
1050
0
    for (i = 0;i < 99;i++) {
1051
0
        upper[i] = toupper(alias[i]);
1052
0
  if (upper[i] == 0) break;
1053
0
    }
1054
0
    upper[i] = 0;
1055
1056
    /*
1057
     * Walk down the list looking for a definition of the alias
1058
     */
1059
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1060
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1061
0
      return(xmlCharEncodingAliases[i].name);
1062
0
  }
1063
0
    }
1064
0
    return(NULL);
1065
0
}
1066
1067
/**
1068
 * xmlAddEncodingAlias:
1069
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1070
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1071
 *
1072
 * Registers an alias @alias for an encoding named @name. Existing alias
1073
 * will be overwritten.
1074
 *
1075
 * Returns 0 in case of success, -1 in case of error
1076
 */
1077
int
1078
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1079
0
    int i;
1080
0
    char upper[100];
1081
1082
0
    if ((name == NULL) || (alias == NULL))
1083
0
  return(-1);
1084
1085
0
    for (i = 0;i < 99;i++) {
1086
0
        upper[i] = toupper(alias[i]);
1087
0
  if (upper[i] == 0) break;
1088
0
    }
1089
0
    upper[i] = 0;
1090
1091
0
    if (xmlCharEncodingAliases == NULL) {
1092
0
  xmlCharEncodingAliasesNb = 0;
1093
0
  xmlCharEncodingAliasesMax = 20;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1096
0
  if (xmlCharEncodingAliases == NULL)
1097
0
      return(-1);
1098
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1099
0
  xmlCharEncodingAliasesMax *= 2;
1100
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1101
0
        xmlRealloc(xmlCharEncodingAliases,
1102
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1103
0
    }
1104
    /*
1105
     * Walk down the list looking for a definition of the alias
1106
     */
1107
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1108
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1109
      /*
1110
       * Replace the definition.
1111
       */
1112
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1113
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1114
0
      return(0);
1115
0
  }
1116
0
    }
1117
    /*
1118
     * Add the definition
1119
     */
1120
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1121
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1122
0
    xmlCharEncodingAliasesNb++;
1123
0
    return(0);
1124
0
}
1125
1126
/**
1127
 * xmlDelEncodingAlias:
1128
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1129
 *
1130
 * Unregisters an encoding alias @alias
1131
 *
1132
 * Returns 0 in case of success, -1 in case of error
1133
 */
1134
int
1135
0
xmlDelEncodingAlias(const char *alias) {
1136
0
    int i;
1137
1138
0
    if (alias == NULL)
1139
0
  return(-1);
1140
1141
0
    if (xmlCharEncodingAliases == NULL)
1142
0
  return(-1);
1143
    /*
1144
     * Walk down the list looking for a definition of the alias
1145
     */
1146
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1147
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1148
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1149
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1150
0
      xmlCharEncodingAliasesNb--;
1151
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1152
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1153
0
      return(0);
1154
0
  }
1155
0
    }
1156
0
    return(-1);
1157
0
}
1158
1159
/**
1160
 * xmlParseCharEncoding:
1161
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1162
 *
1163
 * Compare the string to the encoding schemes already known. Note
1164
 * that the comparison is case insensitive accordingly to the section
1165
 * [XML] 4.3.3 Character Encoding in Entities.
1166
 *
1167
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1168
 * if not recognized.
1169
 */
1170
xmlCharEncoding
1171
xmlParseCharEncoding(const char* name)
1172
311k
{
1173
311k
    const char *alias;
1174
311k
    char upper[500];
1175
311k
    int i;
1176
1177
311k
    if (name == NULL)
1178
247k
  return(XML_CHAR_ENCODING_NONE);
1179
1180
    /*
1181
     * Do the alias resolution
1182
     */
1183
64.3k
    alias = xmlGetEncodingAlias(name);
1184
64.3k
    if (alias != NULL)
1185
0
  name = alias;
1186
1187
654k
    for (i = 0;i < 499;i++) {
1188
654k
        upper[i] = toupper(name[i]);
1189
654k
  if (upper[i] == 0) break;
1190
654k
    }
1191
64.3k
    upper[i] = 0;
1192
1193
64.3k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1194
64.1k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1195
54.7k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1196
1197
    /*
1198
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1199
     *       already found and in use
1200
     */
1201
53.9k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
52.6k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1203
1204
52.6k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205
52.6k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1206
52.5k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1207
1208
    /*
1209
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1210
     *       already found and in use
1211
     */
1212
52.5k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213
49.2k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
49.2k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1215
1216
1217
49.2k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1218
43.0k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1219
43.0k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1220
1221
43.0k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1222
42.8k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1223
42.8k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1224
1225
42.8k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1226
42.6k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1227
42.4k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1228
40.9k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1229
40.7k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1230
40.3k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1231
40.2k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1232
1233
40.0k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1234
40.0k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1235
40.0k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1236
1237
#ifdef DEBUG_ENCODING
1238
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1239
#endif
1240
40.0k
    return(XML_CHAR_ENCODING_ERROR);
1241
40.0k
}
1242
1243
/**
1244
 * xmlGetCharEncodingName:
1245
 * @enc:  the encoding
1246
 *
1247
 * The "canonical" name for XML encoding.
1248
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1249
 * Section 4.3.3  Character Encoding in Entities
1250
 *
1251
 * Returns the canonical name for the given encoding
1252
 */
1253
1254
const char*
1255
3.62k
xmlGetCharEncodingName(xmlCharEncoding enc) {
1256
3.62k
    switch (enc) {
1257
0
        case XML_CHAR_ENCODING_ERROR:
1258
0
      return(NULL);
1259
0
        case XML_CHAR_ENCODING_NONE:
1260
0
      return(NULL);
1261
0
        case XML_CHAR_ENCODING_UTF8:
1262
0
      return("UTF-8");
1263
0
        case XML_CHAR_ENCODING_UTF16LE:
1264
0
      return("UTF-16");
1265
0
        case XML_CHAR_ENCODING_UTF16BE:
1266
0
      return("UTF-16");
1267
0
        case XML_CHAR_ENCODING_EBCDIC:
1268
0
            return("EBCDIC");
1269
3.33k
        case XML_CHAR_ENCODING_UCS4LE:
1270
3.33k
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS4BE:
1272
0
            return("ISO-10646-UCS-4");
1273
130
        case XML_CHAR_ENCODING_UCS4_2143:
1274
130
            return("ISO-10646-UCS-4");
1275
159
        case XML_CHAR_ENCODING_UCS4_3412:
1276
159
            return("ISO-10646-UCS-4");
1277
0
        case XML_CHAR_ENCODING_UCS2:
1278
0
            return("ISO-10646-UCS-2");
1279
0
        case XML_CHAR_ENCODING_8859_1:
1280
0
      return("ISO-8859-1");
1281
0
        case XML_CHAR_ENCODING_8859_2:
1282
0
      return("ISO-8859-2");
1283
0
        case XML_CHAR_ENCODING_8859_3:
1284
0
      return("ISO-8859-3");
1285
0
        case XML_CHAR_ENCODING_8859_4:
1286
0
      return("ISO-8859-4");
1287
0
        case XML_CHAR_ENCODING_8859_5:
1288
0
      return("ISO-8859-5");
1289
0
        case XML_CHAR_ENCODING_8859_6:
1290
0
      return("ISO-8859-6");
1291
0
        case XML_CHAR_ENCODING_8859_7:
1292
0
      return("ISO-8859-7");
1293
0
        case XML_CHAR_ENCODING_8859_8:
1294
0
      return("ISO-8859-8");
1295
0
        case XML_CHAR_ENCODING_8859_9:
1296
0
      return("ISO-8859-9");
1297
0
        case XML_CHAR_ENCODING_2022_JP:
1298
0
            return("ISO-2022-JP");
1299
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1300
0
            return("Shift-JIS");
1301
0
        case XML_CHAR_ENCODING_EUC_JP:
1302
0
            return("EUC-JP");
1303
0
  case XML_CHAR_ENCODING_ASCII:
1304
0
      return(NULL);
1305
3.62k
    }
1306
0
    return(NULL);
1307
3.62k
}
1308
1309
/************************************************************************
1310
 *                  *
1311
 *      Char encoding handlers        *
1312
 *                  *
1313
 ************************************************************************/
1314
1315
1316
/* the size should be growable, but it's not a big deal ... */
1317
108k
#define MAX_ENCODING_HANDLERS 50
1318
static xmlCharEncodingHandlerPtr *handlers = NULL;
1319
static int nbCharEncodingHandler = 0;
1320
1321
/*
1322
 * The default is UTF-8 for XML, that's also the default used for the
1323
 * parser internals, so the default encoding handler is NULL
1324
 */
1325
1326
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1327
1328
/**
1329
 * xmlNewCharEncodingHandler:
1330
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1331
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1332
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1333
 *
1334
 * Create and registers an xmlCharEncodingHandler.
1335
 *
1336
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1337
 */
1338
xmlCharEncodingHandlerPtr
1339
xmlNewCharEncodingHandler(const char *name,
1340
                          xmlCharEncodingInputFunc input,
1341
96.0k
                          xmlCharEncodingOutputFunc output) {
1342
96.0k
    xmlCharEncodingHandlerPtr handler;
1343
96.0k
    const char *alias;
1344
96.0k
    char upper[500];
1345
96.0k
    int i;
1346
96.0k
    char *up = NULL;
1347
1348
    /*
1349
     * Do the alias resolution
1350
     */
1351
96.0k
    alias = xmlGetEncodingAlias(name);
1352
96.0k
    if (alias != NULL)
1353
0
  name = alias;
1354
1355
    /*
1356
     * Keep only the uppercase version of the encoding.
1357
     */
1358
96.0k
    if (name == NULL) {
1359
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1360
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1361
0
  return(NULL);
1362
0
    }
1363
744k
    for (i = 0;i < 499;i++) {
1364
744k
        upper[i] = toupper(name[i]);
1365
744k
  if (upper[i] == 0) break;
1366
744k
    }
1367
96.0k
    upper[i] = 0;
1368
96.0k
    up = xmlMemStrdup(upper);
1369
96.0k
    if (up == NULL) {
1370
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1371
0
  return(NULL);
1372
0
    }
1373
1374
    /*
1375
     * allocate and fill-up an handler block.
1376
     */
1377
96.0k
    handler = (xmlCharEncodingHandlerPtr)
1378
96.0k
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1379
96.0k
    if (handler == NULL) {
1380
0
        xmlFree(up);
1381
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1382
0
  return(NULL);
1383
0
    }
1384
96.0k
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1385
96.0k
    handler->input = input;
1386
96.0k
    handler->output = output;
1387
96.0k
    handler->name = up;
1388
1389
96.0k
#ifdef LIBXML_ICONV_ENABLED
1390
96.0k
    handler->iconv_in = NULL;
1391
96.0k
    handler->iconv_out = NULL;
1392
96.0k
#endif
1393
#ifdef LIBXML_ICU_ENABLED
1394
    handler->uconv_in = NULL;
1395
    handler->uconv_out = NULL;
1396
#endif
1397
1398
    /*
1399
     * registers and returns the handler.
1400
     */
1401
96.0k
    xmlRegisterCharEncodingHandler(handler);
1402
#ifdef DEBUG_ENCODING
1403
    xmlGenericError(xmlGenericErrorContext,
1404
      "Registered encoding handler for %s\n", name);
1405
#endif
1406
96.0k
    return(handler);
1407
96.0k
}
1408
1409
/**
1410
 * xmlInitCharEncodingHandlers:
1411
 *
1412
 * DEPRECATED: This function will be made private. Call xmlInitParser to
1413
 * initialize the library.
1414
 *
1415
 * Initialize the char encoding support, it registers the default
1416
 * encoding supported.
1417
 * NOTE: while public, this function usually doesn't need to be called
1418
 *       in normal processing.
1419
 */
1420
void
1421
12.0k
xmlInitCharEncodingHandlers(void) {
1422
12.0k
    unsigned short int tst = 0x1234;
1423
12.0k
    unsigned char *ptr = (unsigned char *) &tst;
1424
1425
12.0k
    if (handlers != NULL) return;
1426
1427
12.0k
    handlers = (xmlCharEncodingHandlerPtr *)
1428
12.0k
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1429
1430
12.0k
    if (*ptr == 0x12) xmlLittleEndian = 0;
1431
12.0k
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1432
0
    else {
1433
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1434
0
                 "Odd problem at endianness detection\n", NULL);
1435
0
    }
1436
1437
12.0k
    if (handlers == NULL) {
1438
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1439
0
  return;
1440
0
    }
1441
12.0k
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1442
12.0k
#ifdef LIBXML_OUTPUT_ENABLED
1443
12.0k
    xmlUTF16LEHandler =
1444
12.0k
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1445
12.0k
    xmlUTF16BEHandler =
1446
12.0k
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1447
12.0k
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1448
12.0k
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1449
12.0k
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1450
12.0k
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1451
12.0k
#ifdef LIBXML_HTML_ENABLED
1452
12.0k
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1453
12.0k
#endif
1454
#else
1455
    xmlUTF16LEHandler =
1456
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1457
    xmlUTF16BEHandler =
1458
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1459
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1460
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1461
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1462
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1463
#endif /* LIBXML_OUTPUT_ENABLED */
1464
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1465
#ifdef LIBXML_ISO8859X_ENABLED
1466
    xmlRegisterCharEncodingHandlersISO8859x ();
1467
#endif
1468
#endif
1469
1470
12.0k
}
1471
1472
/**
1473
 * xmlCleanupCharEncodingHandlers:
1474
 *
1475
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1476
 * to free global state but see the warnings there. xmlCleanupParser
1477
 * should be only called once at program exit. In most cases, you don't
1478
 * have call cleanup functions at all.
1479
 *
1480
 * Cleanup the memory allocated for the char encoding support, it
1481
 * unregisters all the encoding handlers and the aliases.
1482
 */
1483
void
1484
0
xmlCleanupCharEncodingHandlers(void) {
1485
0
    xmlCleanupEncodingAliases();
1486
1487
0
    if (handlers == NULL) return;
1488
1489
0
    for (;nbCharEncodingHandler > 0;) {
1490
0
        nbCharEncodingHandler--;
1491
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1492
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1493
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1494
0
      xmlFree(handlers[nbCharEncodingHandler]);
1495
0
  }
1496
0
    }
1497
0
    xmlFree(handlers);
1498
0
    handlers = NULL;
1499
0
    nbCharEncodingHandler = 0;
1500
0
    xmlDefaultCharEncodingHandler = NULL;
1501
0
}
1502
1503
/**
1504
 * xmlRegisterCharEncodingHandler:
1505
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1506
 *
1507
 * Register the char encoding handler, surprising, isn't it ?
1508
 */
1509
void
1510
96.0k
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1511
96.0k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1512
96.0k
    if ((handler == NULL) || (handlers == NULL)) {
1513
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1514
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1515
0
        goto free_handler;
1516
0
    }
1517
1518
96.0k
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1519
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1520
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1521
0
                 "MAX_ENCODING_HANDLERS");
1522
0
        goto free_handler;
1523
0
    }
1524
96.0k
    handlers[nbCharEncodingHandler++] = handler;
1525
96.0k
    return;
1526
1527
0
free_handler:
1528
0
    if (handler != NULL) {
1529
0
        if (handler->name != NULL) {
1530
0
            xmlFree(handler->name);
1531
0
        }
1532
0
        xmlFree(handler);
1533
0
    }
1534
0
}
1535
1536
/**
1537
 * xmlGetCharEncodingHandler:
1538
 * @enc:  an xmlCharEncoding value.
1539
 *
1540
 * Search in the registered set the handler able to read/write that encoding.
1541
 *
1542
 * Returns the handler or NULL if not found
1543
 */
1544
xmlCharEncodingHandlerPtr
1545
6.77M
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1546
6.77M
    xmlCharEncodingHandlerPtr handler;
1547
1548
6.77M
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1549
6.77M
    switch (enc) {
1550
0
        case XML_CHAR_ENCODING_ERROR:
1551
0
      return(NULL);
1552
6.54M
        case XML_CHAR_ENCODING_NONE:
1553
6.54M
      return(NULL);
1554
190k
        case XML_CHAR_ENCODING_UTF8:
1555
190k
      return(NULL);
1556
11.3k
        case XML_CHAR_ENCODING_UTF16LE:
1557
11.3k
      return(xmlUTF16LEHandler);
1558
7.99k
        case XML_CHAR_ENCODING_UTF16BE:
1559
7.99k
      return(xmlUTF16BEHandler);
1560
17.7k
        case XML_CHAR_ENCODING_EBCDIC:
1561
17.7k
            handler = xmlFindCharEncodingHandler("EBCDIC");
1562
17.7k
            if (handler != NULL) return(handler);
1563
17.7k
            handler = xmlFindCharEncodingHandler("ebcdic");
1564
17.7k
            if (handler != NULL) return(handler);
1565
17.7k
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1566
17.7k
            if (handler != NULL) return(handler);
1567
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1568
0
            if (handler != NULL) return(handler);
1569
0
      break;
1570
627
        case XML_CHAR_ENCODING_UCS4BE:
1571
627
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1572
627
            if (handler != NULL) return(handler);
1573
627
            handler = xmlFindCharEncodingHandler("UCS-4");
1574
627
            if (handler != NULL) return(handler);
1575
0
            handler = xmlFindCharEncodingHandler("UCS4");
1576
0
            if (handler != NULL) return(handler);
1577
0
      break;
1578
2.70k
        case XML_CHAR_ENCODING_UCS4LE:
1579
2.70k
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1580
2.70k
            if (handler != NULL) return(handler);
1581
2.70k
            handler = xmlFindCharEncodingHandler("UCS-4");
1582
2.70k
            if (handler != NULL) return(handler);
1583
0
            handler = xmlFindCharEncodingHandler("UCS4");
1584
0
            if (handler != NULL) return(handler);
1585
0
      break;
1586
167
        case XML_CHAR_ENCODING_UCS4_2143:
1587
167
      break;
1588
192
        case XML_CHAR_ENCODING_UCS4_3412:
1589
192
      break;
1590
0
        case XML_CHAR_ENCODING_UCS2:
1591
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1592
0
            if (handler != NULL) return(handler);
1593
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1594
0
            if (handler != NULL) return(handler);
1595
0
            handler = xmlFindCharEncodingHandler("UCS2");
1596
0
            if (handler != NULL) return(handler);
1597
0
      break;
1598
1599
      /*
1600
       * We used to keep ISO Latin encodings native in the
1601
       * generated data. This led to so many problems that
1602
       * this has been removed. One can still change this
1603
       * back by registering no-ops encoders for those
1604
       */
1605
943
        case XML_CHAR_ENCODING_8859_1:
1606
943
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1607
943
      if (handler != NULL) return(handler);
1608
0
      break;
1609
45
        case XML_CHAR_ENCODING_8859_2:
1610
45
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1611
45
      if (handler != NULL) return(handler);
1612
0
      break;
1613
51
        case XML_CHAR_ENCODING_8859_3:
1614
51
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1615
51
      if (handler != NULL) return(handler);
1616
0
      break;
1617
34
        case XML_CHAR_ENCODING_8859_4:
1618
34
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1619
34
      if (handler != NULL) return(handler);
1620
0
      break;
1621
65
        case XML_CHAR_ENCODING_8859_5:
1622
65
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1623
65
      if (handler != NULL) return(handler);
1624
0
      break;
1625
49
        case XML_CHAR_ENCODING_8859_6:
1626
49
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1627
49
      if (handler != NULL) return(handler);
1628
0
      break;
1629
68
        case XML_CHAR_ENCODING_8859_7:
1630
68
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1631
68
      if (handler != NULL) return(handler);
1632
0
      break;
1633
23
        case XML_CHAR_ENCODING_8859_8:
1634
23
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1635
23
      if (handler != NULL) return(handler);
1636
0
      break;
1637
34
        case XML_CHAR_ENCODING_8859_9:
1638
34
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1639
34
      if (handler != NULL) return(handler);
1640
0
      break;
1641
1642
1643
0
        case XML_CHAR_ENCODING_2022_JP:
1644
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1645
0
            if (handler != NULL) return(handler);
1646
0
      break;
1647
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1648
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1649
0
            if (handler != NULL) return(handler);
1650
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1651
0
            if (handler != NULL) return(handler);
1652
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1653
0
            if (handler != NULL) return(handler);
1654
0
      break;
1655
0
        case XML_CHAR_ENCODING_EUC_JP:
1656
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1657
0
            if (handler != NULL) return(handler);
1658
0
      break;
1659
0
  default:
1660
0
      break;
1661
6.77M
    }
1662
1663
#ifdef DEBUG_ENCODING
1664
    xmlGenericError(xmlGenericErrorContext,
1665
      "No handler found for encoding %d\n", enc);
1666
#endif
1667
359
    return(NULL);
1668
6.77M
}
1669
1670
/**
1671
 * xmlFindCharEncodingHandler:
1672
 * @name:  a string describing the char encoding.
1673
 *
1674
 * Search in the registered set the handler able to read/write that encoding.
1675
 *
1676
 * Returns the handler or NULL if not found
1677
 */
1678
xmlCharEncodingHandlerPtr
1679
162k
xmlFindCharEncodingHandler(const char *name) {
1680
162k
    const char *nalias;
1681
162k
    const char *norig;
1682
162k
    xmlCharEncoding alias;
1683
162k
#ifdef LIBXML_ICONV_ENABLED
1684
162k
    xmlCharEncodingHandlerPtr enc;
1685
162k
    iconv_t icv_in, icv_out;
1686
162k
#endif /* LIBXML_ICONV_ENABLED */
1687
#ifdef LIBXML_ICU_ENABLED
1688
    xmlCharEncodingHandlerPtr encu;
1689
    uconv_t *ucv_in, *ucv_out;
1690
#endif /* LIBXML_ICU_ENABLED */
1691
162k
    char upper[100];
1692
162k
    int i;
1693
1694
162k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1695
162k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1696
162k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1697
1698
    /*
1699
     * Do the alias resolution
1700
     */
1701
162k
    norig = name;
1702
162k
    nalias = xmlGetEncodingAlias(name);
1703
162k
    if (nalias != NULL)
1704
0
  name = nalias;
1705
1706
    /*
1707
     * Check first for directly registered encoding names
1708
     */
1709
1.57M
    for (i = 0;i < 99;i++) {
1710
1.57M
        upper[i] = toupper(name[i]);
1711
1.57M
  if (upper[i] == 0) break;
1712
1.57M
    }
1713
162k
    upper[i] = 0;
1714
1715
162k
    if (handlers != NULL) {
1716
1.13M
        for (i = 0;i < nbCharEncodingHandler; i++) {
1717
1.04M
            if (!strcmp(upper, handlers[i]->name)) {
1718
#ifdef DEBUG_ENCODING
1719
                xmlGenericError(xmlGenericErrorContext,
1720
                        "Found registered handler for encoding %s\n", name);
1721
#endif
1722
77.6k
                return(handlers[i]);
1723
77.6k
            }
1724
1.04M
        }
1725
162k
    }
1726
1727
85.2k
#ifdef LIBXML_ICONV_ENABLED
1728
    /* check whether iconv can handle this */
1729
85.2k
    icv_in = iconv_open("UTF-8", name);
1730
85.2k
    icv_out = iconv_open(name, "UTF-8");
1731
85.2k
    if (icv_in == (iconv_t) -1) {
1732
40.7k
        icv_in = iconv_open("UTF-8", upper);
1733
40.7k
    }
1734
85.2k
    if (icv_out == (iconv_t) -1) {
1735
40.7k
  icv_out = iconv_open(upper, "UTF-8");
1736
40.7k
    }
1737
85.2k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1738
44.4k
      enc = (xmlCharEncodingHandlerPtr)
1739
44.4k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1740
44.4k
      if (enc == NULL) {
1741
0
          iconv_close(icv_in);
1742
0
          iconv_close(icv_out);
1743
0
    return(NULL);
1744
0
      }
1745
44.4k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1746
44.4k
      enc->name = xmlMemStrdup(name);
1747
44.4k
      enc->input = NULL;
1748
44.4k
      enc->output = NULL;
1749
44.4k
      enc->iconv_in = icv_in;
1750
44.4k
      enc->iconv_out = icv_out;
1751
#ifdef DEBUG_ENCODING
1752
            xmlGenericError(xmlGenericErrorContext,
1753
        "Found iconv handler for encoding %s\n", name);
1754
#endif
1755
44.4k
      return enc;
1756
44.4k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1758
0
        "iconv : problems with filters for '%s'\n", name);
1759
0
      if (icv_in != (iconv_t) -1)
1760
0
    iconv_close(icv_in);
1761
0
      else
1762
0
    iconv_close(icv_out);
1763
0
    }
1764
40.7k
#endif /* LIBXML_ICONV_ENABLED */
1765
#ifdef LIBXML_ICU_ENABLED
1766
    /* check whether icu can handle this */
1767
    ucv_in = openIcuConverter(name, 1);
1768
    ucv_out = openIcuConverter(name, 0);
1769
    if (ucv_in != NULL && ucv_out != NULL) {
1770
      encu = (xmlCharEncodingHandlerPtr)
1771
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1772
      if (encu == NULL) {
1773
                closeIcuConverter(ucv_in);
1774
                closeIcuConverter(ucv_out);
1775
    return(NULL);
1776
      }
1777
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1778
      encu->name = xmlMemStrdup(name);
1779
      encu->input = NULL;
1780
      encu->output = NULL;
1781
      encu->uconv_in = ucv_in;
1782
      encu->uconv_out = ucv_out;
1783
#ifdef DEBUG_ENCODING
1784
            xmlGenericError(xmlGenericErrorContext,
1785
        "Found ICU converter handler for encoding %s\n", name);
1786
#endif
1787
      return encu;
1788
    } else if (ucv_in != NULL || ucv_out != NULL) {
1789
            closeIcuConverter(ucv_in);
1790
            closeIcuConverter(ucv_out);
1791
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1792
        "ICU converter : problems with filters for '%s'\n", name);
1793
    }
1794
#endif /* LIBXML_ICU_ENABLED */
1795
1796
#ifdef DEBUG_ENCODING
1797
    xmlGenericError(xmlGenericErrorContext,
1798
      "No handler found for encoding %s\n", name);
1799
#endif
1800
1801
    /*
1802
     * Fallback using the canonical names
1803
     */
1804
40.7k
    alias = xmlParseCharEncoding(norig);
1805
40.7k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1806
3.33k
        const char* canon;
1807
3.33k
        canon = xmlGetCharEncodingName(alias);
1808
3.33k
        if ((canon != NULL) && (strcmp(name, canon))) {
1809
0
      return(xmlFindCharEncodingHandler(canon));
1810
0
        }
1811
3.33k
    }
1812
1813
    /* If "none of the above", give up */
1814
40.7k
    return(NULL);
1815
40.7k
}
1816
1817
/************************************************************************
1818
 *                  *
1819
 *    ICONV based generic conversion functions    *
1820
 *                  *
1821
 ************************************************************************/
1822
1823
#ifdef LIBXML_ICONV_ENABLED
1824
/**
1825
 * xmlIconvWrapper:
1826
 * @cd:   iconv converter data structure
1827
 * @out:  a pointer to an array of bytes to store the result
1828
 * @outlen:  the length of @out
1829
 * @in:  a pointer to an array of input bytes
1830
 * @inlen:  the length of @in
1831
 *
1832
 * Returns 0 if success, or
1833
 *     -1 by lack of space, or
1834
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1835
 *        the result of transformation can't fit into the encoding we want), or
1836
 *     -3 if there the last byte can't form a single output char.
1837
 *
1838
 * The value of @inlen after return is the number of octets consumed
1839
 *     as the return value is positive, else unpredictable.
1840
 * The value of @outlen after return is the number of octets produced.
1841
 */
1842
static int
1843
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1844
682k
                const unsigned char *in, int *inlen) {
1845
682k
    size_t icv_inlen, icv_outlen;
1846
682k
    const char *icv_in = (const char *) in;
1847
682k
    char *icv_out = (char *) out;
1848
682k
    size_t ret;
1849
1850
682k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1851
3.06k
        if (outlen != NULL) *outlen = 0;
1852
3.06k
        return(-1);
1853
3.06k
    }
1854
679k
    icv_inlen = *inlen;
1855
679k
    icv_outlen = *outlen;
1856
    /*
1857
     * Some versions take const, other versions take non-const input.
1858
     */
1859
679k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1860
679k
    *inlen -= icv_inlen;
1861
679k
    *outlen -= icv_outlen;
1862
679k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1863
159k
#ifdef EILSEQ
1864
159k
        if (errno == EILSEQ) {
1865
152k
            return -2;
1866
152k
        } else
1867
6.41k
#endif
1868
6.41k
#ifdef E2BIG
1869
6.41k
        if (errno == E2BIG) {
1870
461
            return -1;
1871
461
        } else
1872
5.95k
#endif
1873
5.95k
#ifdef EINVAL
1874
5.95k
        if (errno == EINVAL) {
1875
5.95k
            return -3;
1876
5.95k
        } else
1877
0
#endif
1878
0
        {
1879
0
            return -3;
1880
0
        }
1881
159k
    }
1882
520k
    return 0;
1883
679k
}
1884
#endif /* LIBXML_ICONV_ENABLED */
1885
1886
/************************************************************************
1887
 *                  *
1888
 *    ICU based generic conversion functions    *
1889
 *                  *
1890
 ************************************************************************/
1891
1892
#ifdef LIBXML_ICU_ENABLED
1893
/**
1894
 * xmlUconvWrapper:
1895
 * @cd: ICU uconverter data structure
1896
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1897
 * @out:  a pointer to an array of bytes to store the result
1898
 * @outlen:  the length of @out
1899
 * @in:  a pointer to an array of input bytes
1900
 * @inlen:  the length of @in
1901
 * @flush: if true, indicates end of input
1902
 *
1903
 * Returns 0 if success, or
1904
 *     -1 by lack of space, or
1905
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1906
 *        the result of transformation can't fit into the encoding we want), or
1907
 *     -3 if there the last byte can't form a single output char.
1908
 *
1909
 * The value of @inlen after return is the number of octets consumed
1910
 *     as the return value is positive, else unpredictable.
1911
 * The value of @outlen after return is the number of octets produced.
1912
 */
1913
static int
1914
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1915
                const unsigned char *in, int *inlen, int flush) {
1916
    const char *ucv_in = (const char *) in;
1917
    char *ucv_out = (char *) out;
1918
    UErrorCode err = U_ZERO_ERROR;
1919
1920
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1921
        if (outlen != NULL) *outlen = 0;
1922
        return(-1);
1923
    }
1924
1925
    if (toUnicode) {
1926
        /* encoding => UTF-16 => UTF-8 */
1927
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1928
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1929
                       &cd->pivot_source, &cd->pivot_target,
1930
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1931
    } else {
1932
        /* UTF-8 => UTF-16 => encoding */
1933
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1934
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1935
                       &cd->pivot_source, &cd->pivot_target,
1936
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1937
    }
1938
    *inlen = ucv_in - (const char*) in;
1939
    *outlen = ucv_out - (char *) out;
1940
    if (U_SUCCESS(err)) {
1941
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1942
        if (flush)
1943
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1944
        return 0;
1945
    }
1946
    if (err == U_BUFFER_OVERFLOW_ERROR)
1947
        return -1;
1948
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1949
        return -2;
1950
    return -3;
1951
}
1952
#endif /* LIBXML_ICU_ENABLED */
1953
1954
/************************************************************************
1955
 *                  *
1956
 *    The real API used by libxml for on-the-fly conversion *
1957
 *                  *
1958
 ************************************************************************/
1959
1960
/**
1961
 * xmlEncInputChunk:
1962
 * @handler:  encoding handler
1963
 * @out:  a pointer to an array of bytes to store the result
1964
 * @outlen:  the length of @out
1965
 * @in:  a pointer to an array of input bytes
1966
 * @inlen:  the length of @in
1967
 * @flush:  flush (ICU-related)
1968
 *
1969
 * Returns 0 if success, or
1970
 *     -1 by lack of space, or
1971
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1972
 *        the result of transformation can't fit into the encoding we want), or
1973
 *     -3 if there the last byte can't form a single output char.
1974
 *
1975
 * The value of @inlen after return is the number of octets consumed
1976
 *     as the return value is 0, else unpredictable.
1977
 * The value of @outlen after return is the number of octets produced.
1978
 */
1979
static int
1980
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1981
2.14M
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
1982
2.14M
    int ret;
1983
2.14M
    (void)flush;
1984
1985
2.14M
    if (handler->input != NULL) {
1986
1.73M
        ret = handler->input(out, outlen, in, inlen);
1987
1.73M
        if (ret > 0)
1988
1.32M
           ret = 0;
1989
1.73M
    }
1990
414k
#ifdef LIBXML_ICONV_ENABLED
1991
414k
    else if (handler->iconv_in != NULL) {
1992
414k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1993
414k
    }
1994
15
#endif /* LIBXML_ICONV_ENABLED */
1995
#ifdef LIBXML_ICU_ENABLED
1996
    else if (handler->uconv_in != NULL) {
1997
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1998
                              flush);
1999
    }
2000
#endif /* LIBXML_ICU_ENABLED */
2001
15
    else {
2002
15
        *outlen = 0;
2003
15
        *inlen = 0;
2004
15
        ret = -2;
2005
15
    }
2006
2007
2.14M
    return(ret);
2008
2.14M
}
2009
2010
/**
2011
 * xmlEncOutputChunk:
2012
 * @handler:  encoding handler
2013
 * @out:  a pointer to an array of bytes to store the result
2014
 * @outlen:  the length of @out
2015
 * @in:  a pointer to an array of input bytes
2016
 * @inlen:  the length of @in
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *     -4 if no output function was found.
2024
 *
2025
 * The value of @inlen after return is the number of octets consumed
2026
 *     as the return value is 0, else unpredictable.
2027
 * The value of @outlen after return is the number of octets produced.
2028
 */
2029
static int
2030
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2031
1.95M
                  int *outlen, const unsigned char *in, int *inlen) {
2032
1.95M
    int ret;
2033
2034
1.95M
    if (handler->output != NULL) {
2035
1.68M
        ret = handler->output(out, outlen, in, inlen);
2036
1.68M
        if (ret > 0)
2037
1.12M
           ret = 0;
2038
1.68M
    }
2039
268k
#ifdef LIBXML_ICONV_ENABLED
2040
268k
    else if (handler->iconv_out != NULL) {
2041
268k
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2042
268k
    }
2043
0
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_out != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2047
                              1);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
0
    else {
2051
0
        *outlen = 0;
2052
0
        *inlen = 0;
2053
0
        ret = -4;
2054
0
    }
2055
2056
1.95M
    return(ret);
2057
1.95M
}
2058
2059
/**
2060
 * xmlCharEncFirstLine:
2061
 * @handler:  char encoding transformation data structure
2062
 * @out:  an xmlBuffer for the output.
2063
 * @in:  an xmlBuffer for the input
2064
 *
2065
 * Front-end for the encoding handler input function, but handle only
2066
 * the very first line, i.e. limit itself to 45 chars.
2067
 *
2068
 * Returns the number of byte written if success, or
2069
 *     -1 general error
2070
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071
 *        the result of transformation can't fit into the encoding we want), or
2072
 */
2073
int
2074
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075
0
                    xmlBufferPtr in) {
2076
0
    int ret;
2077
0
    int written;
2078
0
    int toconv;
2079
2080
0
    if (handler == NULL) return(-1);
2081
0
    if (out == NULL) return(-1);
2082
0
    if (in == NULL) return(-1);
2083
2084
    /* calculate space available */
2085
0
    written = out->size - out->use - 1; /* count '\0' */
2086
0
    toconv = in->use;
2087
    /*
2088
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089
     * 45 chars should be sufficient to reach the end of the encoding
2090
     * declaration without going too far inside the document content.
2091
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2092
     * The actual value depending on guessed encoding is passed as @len
2093
     * if provided
2094
     */
2095
0
    if (toconv > 180)
2096
0
        toconv = 180;
2097
0
    if (toconv * 2 >= written) {
2098
0
        xmlBufferGrow(out, toconv * 2);
2099
0
  written = out->size - out->use - 1;
2100
0
    }
2101
2102
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2103
0
                           in->content, &toconv, 0);
2104
0
    xmlBufferShrink(in, toconv);
2105
0
    out->use += written;
2106
0
    out->content[out->use] = 0;
2107
0
    if (ret == -1) ret = -3;
2108
2109
#ifdef DEBUG_ENCODING
2110
    switch (ret) {
2111
        case 0:
2112
      xmlGenericError(xmlGenericErrorContext,
2113
        "converted %d bytes to %d bytes of input\n",
2114
              toconv, written);
2115
      break;
2116
        case -1:
2117
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2118
              toconv, written, in->use);
2119
      break;
2120
        case -2:
2121
      xmlGenericError(xmlGenericErrorContext,
2122
        "input conversion failed due to input error\n");
2123
      break;
2124
        case -3:
2125
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2126
              toconv, written, in->use);
2127
      break;
2128
  default:
2129
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2130
    }
2131
#endif /* DEBUG_ENCODING */
2132
    /*
2133
     * Ignore when input buffer is not on a boundary
2134
     */
2135
0
    if (ret == -3) ret = 0;
2136
0
    if (ret == -1) ret = 0;
2137
0
    return(written ? written : ret);
2138
0
}
2139
2140
/**
2141
 * xmlCharEncFirstLineInput:
2142
 * @input: a parser input buffer
2143
 * @len:  number of bytes to convert for the first line, or -1
2144
 *
2145
 * Front-end for the encoding handler input function, but handle only
2146
 * the very first line. Point is that this is based on autodetection
2147
 * of the encoding and once that first line is converted we may find
2148
 * out that a different decoder is needed to process the input.
2149
 *
2150
 * Returns the number of byte written if success, or
2151
 *     -1 general error
2152
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2153
 *        the result of transformation can't fit into the encoding we want), or
2154
 */
2155
int
2156
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2157
100k
{
2158
100k
    int ret;
2159
100k
    size_t written;
2160
100k
    size_t toconv;
2161
100k
    int c_in;
2162
100k
    int c_out;
2163
100k
    xmlBufPtr in;
2164
100k
    xmlBufPtr out;
2165
2166
100k
    if ((input == NULL) || (input->encoder == NULL) ||
2167
100k
        (input->buffer == NULL) || (input->raw == NULL))
2168
0
        return (-1);
2169
100k
    out = input->buffer;
2170
100k
    in = input->raw;
2171
2172
100k
    toconv = xmlBufUse(in);
2173
100k
    if (toconv == 0)
2174
88
        return (0);
2175
100k
    written = xmlBufAvail(out);
2176
    /*
2177
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2178
     * 45 chars should be sufficient to reach the end of the encoding
2179
     * declaration without going too far inside the document content.
2180
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2181
     * The actual value depending on guessed encoding is passed as @len
2182
     * if provided
2183
     */
2184
100k
    if (len >= 0) {
2185
21.1k
        if (toconv > (unsigned int) len)
2186
15.0k
            toconv = len;
2187
79.2k
    } else {
2188
79.2k
        if (toconv > 180)
2189
44.4k
            toconv = 180;
2190
79.2k
    }
2191
100k
    if (toconv * 2 >= written) {
2192
0
        xmlBufGrow(out, toconv * 2);
2193
0
        written = xmlBufAvail(out);
2194
0
    }
2195
100k
    if (written > 360)
2196
100k
        written = 360;
2197
2198
100k
    c_in = toconv;
2199
100k
    c_out = written;
2200
100k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2201
100k
                           xmlBufContent(in), &c_in, 0);
2202
100k
    xmlBufShrink(in, c_in);
2203
100k
    xmlBufAddLen(out, c_out);
2204
100k
    if (ret == -1)
2205
2.58k
        ret = -3;
2206
2207
100k
    switch (ret) {
2208
89.9k
        case 0:
2209
#ifdef DEBUG_ENCODING
2210
            xmlGenericError(xmlGenericErrorContext,
2211
                            "converted %d bytes to %d bytes of input\n",
2212
                            c_in, c_out);
2213
#endif
2214
89.9k
            break;
2215
0
        case -1:
2216
#ifdef DEBUG_ENCODING
2217
            xmlGenericError(xmlGenericErrorContext,
2218
                         "converted %d bytes to %d bytes of input, %d left\n",
2219
                            c_in, c_out, (int)xmlBufUse(in));
2220
#endif
2221
0
            break;
2222
3.20k
        case -3:
2223
#ifdef DEBUG_ENCODING
2224
            xmlGenericError(xmlGenericErrorContext,
2225
                        "converted %d bytes to %d bytes of input, %d left\n",
2226
                            c_in, c_out, (int)xmlBufUse(in));
2227
#endif
2228
3.20k
            break;
2229
7.23k
        case -2: {
2230
7.23k
            char buf[50];
2231
7.23k
            const xmlChar *content = xmlBufContent(in);
2232
2233
7.23k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2234
7.23k
         content[0], content[1],
2235
7.23k
         content[2], content[3]);
2236
7.23k
      buf[49] = 0;
2237
7.23k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2238
7.23k
        "input conversion failed due to input error, bytes %s\n",
2239
7.23k
               buf);
2240
7.23k
        }
2241
100k
    }
2242
    /*
2243
     * Ignore when input buffer is not on a boundary
2244
     */
2245
100k
    if (ret == -3) ret = 0;
2246
100k
    if (ret == -1) ret = 0;
2247
100k
    return(c_out ? c_out : ret);
2248
100k
}
2249
2250
/**
2251
 * xmlCharEncInput:
2252
 * @input: a parser input buffer
2253
 * @flush: try to flush all the raw buffer
2254
 *
2255
 * Generic front-end for the encoding handler on parser input
2256
 *
2257
 * Returns the number of byte written if success, or
2258
 *     -1 general error
2259
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2260
 *        the result of transformation can't fit into the encoding we want), or
2261
 */
2262
int
2263
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2264
2.55M
{
2265
2.55M
    int ret;
2266
2.55M
    size_t written;
2267
2.55M
    size_t toconv;
2268
2.55M
    int c_in;
2269
2.55M
    int c_out;
2270
2.55M
    xmlBufPtr in;
2271
2.55M
    xmlBufPtr out;
2272
2273
2.55M
    if ((input == NULL) || (input->encoder == NULL) ||
2274
2.55M
        (input->buffer == NULL) || (input->raw == NULL))
2275
0
        return (-1);
2276
2.55M
    out = input->buffer;
2277
2.55M
    in = input->raw;
2278
2279
2.55M
    toconv = xmlBufUse(in);
2280
2.55M
    if (toconv == 0)
2281
509k
        return (0);
2282
2.04M
    if ((toconv > 64 * 1024) && (flush == 0))
2283
1
        toconv = 64 * 1024;
2284
2.04M
    written = xmlBufAvail(out);
2285
2.04M
    if (toconv * 2 >= written) {
2286
70.1k
        xmlBufGrow(out, toconv * 2);
2287
70.1k
        written = xmlBufAvail(out);
2288
70.1k
    }
2289
2.04M
    if ((written > 128 * 1024) && (flush == 0))
2290
6
        written = 128 * 1024;
2291
2292
2.04M
    c_in = toconv;
2293
2.04M
    c_out = written;
2294
2.04M
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2295
2.04M
                           xmlBufContent(in), &c_in, flush);
2296
2.04M
    xmlBufShrink(in, c_in);
2297
2.04M
    xmlBufAddLen(out, c_out);
2298
2.04M
    if (ret == -1)
2299
289k
        ret = -3;
2300
2301
2.04M
    switch (ret) {
2302
1.72M
        case 0:
2303
#ifdef DEBUG_ENCODING
2304
            xmlGenericError(xmlGenericErrorContext,
2305
                            "converted %d bytes to %d bytes of input\n",
2306
                            c_in, c_out);
2307
#endif
2308
1.72M
            break;
2309
0
        case -1:
2310
#ifdef DEBUG_ENCODING
2311
            xmlGenericError(xmlGenericErrorContext,
2312
                         "converted %d bytes to %d bytes of input, %d left\n",
2313
                            c_in, c_out, (int)xmlBufUse(in));
2314
#endif
2315
0
            break;
2316
293k
        case -3:
2317
#ifdef DEBUG_ENCODING
2318
            xmlGenericError(xmlGenericErrorContext,
2319
                        "converted %d bytes to %d bytes of input, %d left\n",
2320
                            c_in, c_out, (int)xmlBufUse(in));
2321
#endif
2322
293k
            break;
2323
25.4k
        case -2: {
2324
25.4k
            char buf[50];
2325
25.4k
            const xmlChar *content = xmlBufContent(in);
2326
2327
25.4k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2328
25.4k
         content[0], content[1],
2329
25.4k
         content[2], content[3]);
2330
25.4k
      buf[49] = 0;
2331
25.4k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2332
25.4k
        "input conversion failed due to input error, bytes %s\n",
2333
25.4k
               buf);
2334
25.4k
        }
2335
2.04M
    }
2336
    /*
2337
     * Ignore when input buffer is not on a boundary
2338
     */
2339
2.04M
    if (ret == -3)
2340
293k
        ret = 0;
2341
2.04M
    return (c_out? c_out : ret);
2342
2.04M
}
2343
2344
/**
2345
 * xmlCharEncInFunc:
2346
 * @handler:  char encoding transformation data structure
2347
 * @out:  an xmlBuffer for the output.
2348
 * @in:  an xmlBuffer for the input
2349
 *
2350
 * Generic front-end for the encoding handler input function
2351
 *
2352
 * Returns the number of byte written if success, or
2353
 *     -1 general error
2354
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2355
 *        the result of transformation can't fit into the encoding we want), or
2356
 */
2357
int
2358
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2359
                 xmlBufferPtr in)
2360
0
{
2361
0
    int ret;
2362
0
    int written;
2363
0
    int toconv;
2364
2365
0
    if (handler == NULL)
2366
0
        return (-1);
2367
0
    if (out == NULL)
2368
0
        return (-1);
2369
0
    if (in == NULL)
2370
0
        return (-1);
2371
2372
0
    toconv = in->use;
2373
0
    if (toconv == 0)
2374
0
        return (0);
2375
0
    written = out->size - out->use -1; /* count '\0' */
2376
0
    if (toconv * 2 >= written) {
2377
0
        xmlBufferGrow(out, out->size + toconv * 2);
2378
0
        written = out->size - out->use - 1;
2379
0
    }
2380
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2381
0
                           in->content, &toconv, 1);
2382
0
    xmlBufferShrink(in, toconv);
2383
0
    out->use += written;
2384
0
    out->content[out->use] = 0;
2385
0
    if (ret == -1)
2386
0
        ret = -3;
2387
2388
0
    switch (ret) {
2389
0
        case 0:
2390
#ifdef DEBUG_ENCODING
2391
            xmlGenericError(xmlGenericErrorContext,
2392
                            "converted %d bytes to %d bytes of input\n",
2393
                            toconv, written);
2394
#endif
2395
0
            break;
2396
0
        case -1:
2397
#ifdef DEBUG_ENCODING
2398
            xmlGenericError(xmlGenericErrorContext,
2399
                         "converted %d bytes to %d bytes of input, %d left\n",
2400
                            toconv, written, in->use);
2401
#endif
2402
0
            break;
2403
0
        case -3:
2404
#ifdef DEBUG_ENCODING
2405
            xmlGenericError(xmlGenericErrorContext,
2406
                        "converted %d bytes to %d bytes of input, %d left\n",
2407
                            toconv, written, in->use);
2408
#endif
2409
0
            break;
2410
0
        case -2: {
2411
0
            char buf[50];
2412
2413
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2414
0
         in->content[0], in->content[1],
2415
0
         in->content[2], in->content[3]);
2416
0
      buf[49] = 0;
2417
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2418
0
        "input conversion failed due to input error, bytes %s\n",
2419
0
               buf);
2420
0
        }
2421
0
    }
2422
    /*
2423
     * Ignore when input buffer is not on a boundary
2424
     */
2425
0
    if (ret == -3)
2426
0
        ret = 0;
2427
0
    return (written? written : ret);
2428
0
}
2429
2430
#ifdef LIBXML_OUTPUT_ENABLED
2431
/**
2432
 * xmlCharEncOutput:
2433
 * @output: a parser output buffer
2434
 * @init: is this an initialization call without data
2435
 *
2436
 * Generic front-end for the encoding handler on parser output
2437
 * a first call with @init == 1 has to be made first to initiate the
2438
 * output in case of non-stateless encoding needing to initiate their
2439
 * state or the output (like the BOM in UTF16).
2440
 * In case of UTF8 sequence conversion errors for the given encoder,
2441
 * the content will be automatically remapped to a CharRef sequence.
2442
 *
2443
 * Returns the number of byte written if success, or
2444
 *     -1 general error
2445
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2446
 *        the result of transformation can't fit into the encoding we want), or
2447
 */
2448
int
2449
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2450
698k
{
2451
698k
    int ret;
2452
698k
    size_t written;
2453
698k
    int writtentot = 0;
2454
698k
    size_t toconv;
2455
698k
    int c_in;
2456
698k
    int c_out;
2457
698k
    xmlBufPtr in;
2458
698k
    xmlBufPtr out;
2459
2460
698k
    if ((output == NULL) || (output->encoder == NULL) ||
2461
698k
        (output->buffer == NULL) || (output->conv == NULL))
2462
0
        return (-1);
2463
698k
    out = output->conv;
2464
698k
    in = output->buffer;
2465
2466
1.33M
retry:
2467
2468
1.33M
    written = xmlBufAvail(out);
2469
2470
    /*
2471
     * First specific handling of the initialization call
2472
     */
2473
1.33M
    if (init) {
2474
19.6k
        c_in = 0;
2475
19.6k
        c_out = written;
2476
        /* TODO: Check return value. */
2477
19.6k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2478
19.6k
                          NULL, &c_in);
2479
19.6k
        xmlBufAddLen(out, c_out);
2480
#ifdef DEBUG_ENCODING
2481
  xmlGenericError(xmlGenericErrorContext,
2482
    "initialized encoder\n");
2483
#endif
2484
19.6k
        return(c_out);
2485
19.6k
    }
2486
2487
    /*
2488
     * Conversion itself.
2489
     */
2490
1.31M
    toconv = xmlBufUse(in);
2491
1.31M
    if (toconv == 0)
2492
18.2k
        return (writtentot);
2493
1.29M
    if (toconv > 64 * 1024)
2494
1
        toconv = 64 * 1024;
2495
1.29M
    if (toconv * 4 >= written) {
2496
19.4k
        xmlBufGrow(out, toconv * 4);
2497
19.4k
        written = xmlBufAvail(out);
2498
19.4k
    }
2499
1.29M
    if (written > 256 * 1024)
2500
575k
        written = 256 * 1024;
2501
2502
1.29M
    c_in = toconv;
2503
1.29M
    c_out = written;
2504
1.29M
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2505
1.29M
                            xmlBufContent(in), &c_in);
2506
1.29M
    xmlBufShrink(in, c_in);
2507
1.29M
    xmlBufAddLen(out, c_out);
2508
1.29M
    writtentot += c_out;
2509
1.29M
    if (ret == -1) {
2510
0
        if (c_out > 0) {
2511
            /* Can be a limitation of iconv or uconv */
2512
0
            goto retry;
2513
0
        }
2514
0
        ret = -3;
2515
0
    }
2516
2517
    /*
2518
     * Attempt to handle error cases
2519
     */
2520
1.29M
    switch (ret) {
2521
656k
        case 0:
2522
#ifdef DEBUG_ENCODING
2523
      xmlGenericError(xmlGenericErrorContext,
2524
        "converted %d bytes to %d bytes of output\n",
2525
              c_in, c_out);
2526
#endif
2527
656k
      break;
2528
0
        case -1:
2529
#ifdef DEBUG_ENCODING
2530
      xmlGenericError(xmlGenericErrorContext,
2531
        "output conversion failed by lack of space\n");
2532
#endif
2533
0
      break;
2534
1.22k
        case -3:
2535
#ifdef DEBUG_ENCODING
2536
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2537
              c_in, c_out, (int) xmlBufUse(in));
2538
#endif
2539
1.22k
      break;
2540
0
        case -4:
2541
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2542
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2543
0
            ret = -1;
2544
0
            break;
2545
641k
        case -2: {
2546
641k
      xmlChar charref[20];
2547
641k
      int len = xmlBufUse(in);
2548
641k
            xmlChar *content = xmlBufContent(in);
2549
641k
      int cur, charrefLen;
2550
2551
641k
      cur = xmlGetUTF8Char(content, &len);
2552
641k
      if (cur <= 0)
2553
2.77k
                break;
2554
2555
#ifdef DEBUG_ENCODING
2556
            xmlGenericError(xmlGenericErrorContext,
2557
                    "handling output conversion error\n");
2558
            xmlGenericError(xmlGenericErrorContext,
2559
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2560
                    content[0], content[1],
2561
                    content[2], content[3]);
2562
#endif
2563
            /*
2564
             * Removes the UTF8 sequence, and replace it by a charref
2565
             * and continue the transcoding phase, hoping the error
2566
             * did not mangle the encoder state.
2567
             */
2568
638k
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2569
638k
                             "&#%d;", cur);
2570
638k
            xmlBufShrink(in, len);
2571
638k
            xmlBufGrow(out, charrefLen * 4);
2572
638k
            c_out = xmlBufAvail(out);
2573
638k
            c_in = charrefLen;
2574
638k
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2575
638k
                                    charref, &c_in);
2576
2577
638k
      if ((ret < 0) || (c_in != charrefLen)) {
2578
0
    char buf[50];
2579
2580
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2581
0
       content[0], content[1],
2582
0
       content[2], content[3]);
2583
0
    buf[49] = 0;
2584
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2585
0
        "output conversion failed due to conv error, bytes %s\n",
2586
0
             buf);
2587
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2588
0
        content[0] = ' ';
2589
0
                break;
2590
0
      }
2591
2592
638k
            xmlBufAddLen(out, c_out);
2593
638k
            writtentot += c_out;
2594
638k
            goto retry;
2595
638k
  }
2596
1.29M
    }
2597
660k
    return(writtentot ? writtentot : ret);
2598
1.29M
}
2599
#endif
2600
2601
/**
2602
 * xmlCharEncOutFunc:
2603
 * @handler:  char encoding transformation data structure
2604
 * @out:  an xmlBuffer for the output.
2605
 * @in:  an xmlBuffer for the input
2606
 *
2607
 * Generic front-end for the encoding handler output function
2608
 * a first call with @in == NULL has to be made firs to initiate the
2609
 * output in case of non-stateless encoding needing to initiate their
2610
 * state or the output (like the BOM in UTF16).
2611
 * In case of UTF8 sequence conversion errors for the given encoder,
2612
 * the content will be automatically remapped to a CharRef sequence.
2613
 *
2614
 * Returns the number of byte written if success, or
2615
 *     -1 general error
2616
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2617
 *        the result of transformation can't fit into the encoding we want), or
2618
 */
2619
int
2620
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2621
0
                  xmlBufferPtr in) {
2622
0
    int ret;
2623
0
    int written;
2624
0
    int writtentot = 0;
2625
0
    int toconv;
2626
2627
0
    if (handler == NULL) return(-1);
2628
0
    if (out == NULL) return(-1);
2629
2630
0
retry:
2631
2632
0
    written = out->size - out->use;
2633
2634
0
    if (written > 0)
2635
0
  written--; /* Gennady: count '/0' */
2636
2637
    /*
2638
     * First specific handling of in = NULL, i.e. the initialization call
2639
     */
2640
0
    if (in == NULL) {
2641
0
        toconv = 0;
2642
        /* TODO: Check return value. */
2643
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2644
0
                          NULL, &toconv);
2645
0
        out->use += written;
2646
0
        out->content[out->use] = 0;
2647
#ifdef DEBUG_ENCODING
2648
  xmlGenericError(xmlGenericErrorContext,
2649
    "initialized encoder\n");
2650
#endif
2651
0
        return(0);
2652
0
    }
2653
2654
    /*
2655
     * Conversion itself.
2656
     */
2657
0
    toconv = in->use;
2658
0
    if (toconv == 0)
2659
0
  return(0);
2660
0
    if (toconv * 4 >= written) {
2661
0
        xmlBufferGrow(out, toconv * 4);
2662
0
  written = out->size - out->use - 1;
2663
0
    }
2664
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2665
0
                            in->content, &toconv);
2666
0
    xmlBufferShrink(in, toconv);
2667
0
    out->use += written;
2668
0
    writtentot += written;
2669
0
    out->content[out->use] = 0;
2670
0
    if (ret == -1) {
2671
0
        if (written > 0) {
2672
            /* Can be a limitation of iconv or uconv */
2673
0
            goto retry;
2674
0
        }
2675
0
        ret = -3;
2676
0
    }
2677
2678
    /*
2679
     * Attempt to handle error cases
2680
     */
2681
0
    switch (ret) {
2682
0
        case 0:
2683
#ifdef DEBUG_ENCODING
2684
      xmlGenericError(xmlGenericErrorContext,
2685
        "converted %d bytes to %d bytes of output\n",
2686
              toconv, written);
2687
#endif
2688
0
      break;
2689
0
        case -1:
2690
#ifdef DEBUG_ENCODING
2691
      xmlGenericError(xmlGenericErrorContext,
2692
        "output conversion failed by lack of space\n");
2693
#endif
2694
0
      break;
2695
0
        case -3:
2696
#ifdef DEBUG_ENCODING
2697
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2698
              toconv, written, in->use);
2699
#endif
2700
0
      break;
2701
0
        case -4:
2702
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2703
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2704
0
      ret = -1;
2705
0
            break;
2706
0
        case -2: {
2707
0
      xmlChar charref[20];
2708
0
      int len = in->use;
2709
0
      const xmlChar *utf = (const xmlChar *) in->content;
2710
0
      int cur, charrefLen;
2711
2712
0
      cur = xmlGetUTF8Char(utf, &len);
2713
0
      if (cur <= 0)
2714
0
                break;
2715
2716
#ifdef DEBUG_ENCODING
2717
            xmlGenericError(xmlGenericErrorContext,
2718
                    "handling output conversion error\n");
2719
            xmlGenericError(xmlGenericErrorContext,
2720
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2721
                    in->content[0], in->content[1],
2722
                    in->content[2], in->content[3]);
2723
#endif
2724
            /*
2725
             * Removes the UTF8 sequence, and replace it by a charref
2726
             * and continue the transcoding phase, hoping the error
2727
             * did not mangle the encoder state.
2728
             */
2729
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2730
0
                             "&#%d;", cur);
2731
0
            xmlBufferShrink(in, len);
2732
0
            xmlBufferGrow(out, charrefLen * 4);
2733
0
      written = out->size - out->use - 1;
2734
0
            toconv = charrefLen;
2735
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2736
0
                                    charref, &toconv);
2737
2738
0
      if ((ret < 0) || (toconv != charrefLen)) {
2739
0
    char buf[50];
2740
2741
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2742
0
       in->content[0], in->content[1],
2743
0
       in->content[2], in->content[3]);
2744
0
    buf[49] = 0;
2745
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2746
0
        "output conversion failed due to conv error, bytes %s\n",
2747
0
             buf);
2748
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2749
0
        in->content[0] = ' ';
2750
0
          break;
2751
0
      }
2752
2753
0
            out->use += written;
2754
0
            writtentot += written;
2755
0
            out->content[out->use] = 0;
2756
0
            goto retry;
2757
0
  }
2758
0
    }
2759
0
    return(writtentot ? writtentot : ret);
2760
0
}
2761
2762
/**
2763
 * xmlCharEncCloseFunc:
2764
 * @handler:  char encoding transformation data structure
2765
 *
2766
 * Generic front-end for encoding handler close function
2767
 *
2768
 * Returns 0 if success, or -1 in case of error
2769
 */
2770
int
2771
136k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2772
136k
    int ret = 0;
2773
136k
    int tofree = 0;
2774
136k
    int i, handler_in_list = 0;
2775
2776
    /* Avoid unused variable warning if features are disabled. */
2777
136k
    (void) handler_in_list;
2778
2779
136k
    if (handler == NULL) return(-1);
2780
136k
    if (handler->name == NULL) return(-1);
2781
136k
    if (handlers != NULL) {
2782
802k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2783
758k
            if (handler == handlers[i]) {
2784
92.2k
          handler_in_list = 1;
2785
92.2k
    break;
2786
92.2k
      }
2787
758k
  }
2788
136k
    }
2789
136k
#ifdef LIBXML_ICONV_ENABLED
2790
    /*
2791
     * Iconv handlers can be used only once, free the whole block.
2792
     * and the associated icon resources.
2793
     */
2794
136k
    if ((handler_in_list == 0) &&
2795
136k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2796
44.4k
        tofree = 1;
2797
44.4k
  if (handler->iconv_out != NULL) {
2798
44.4k
      if (iconv_close(handler->iconv_out))
2799
0
    ret = -1;
2800
44.4k
      handler->iconv_out = NULL;
2801
44.4k
  }
2802
44.4k
  if (handler->iconv_in != NULL) {
2803
44.4k
      if (iconv_close(handler->iconv_in))
2804
0
    ret = -1;
2805
44.4k
      handler->iconv_in = NULL;
2806
44.4k
  }
2807
44.4k
    }
2808
136k
#endif /* LIBXML_ICONV_ENABLED */
2809
#ifdef LIBXML_ICU_ENABLED
2810
    if ((handler_in_list == 0) &&
2811
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2812
        tofree = 1;
2813
  if (handler->uconv_out != NULL) {
2814
      closeIcuConverter(handler->uconv_out);
2815
      handler->uconv_out = NULL;
2816
  }
2817
  if (handler->uconv_in != NULL) {
2818
      closeIcuConverter(handler->uconv_in);
2819
      handler->uconv_in = NULL;
2820
  }
2821
    }
2822
#endif
2823
136k
    if (tofree) {
2824
        /* free up only dynamic handlers iconv/uconv */
2825
44.4k
        if (handler->name != NULL)
2826
44.4k
            xmlFree(handler->name);
2827
44.4k
        handler->name = NULL;
2828
44.4k
        xmlFree(handler);
2829
44.4k
    }
2830
#ifdef DEBUG_ENCODING
2831
    if (ret)
2832
        xmlGenericError(xmlGenericErrorContext,
2833
    "failed to close the encoding handler\n");
2834
    else
2835
        xmlGenericError(xmlGenericErrorContext,
2836
    "closed the encoding handler\n");
2837
#endif
2838
2839
136k
    return(ret);
2840
136k
}
2841
2842
/**
2843
 * xmlByteConsumed:
2844
 * @ctxt: an XML parser context
2845
 *
2846
 * This function provides the current index of the parser relative
2847
 * to the start of the current entity. This function is computed in
2848
 * bytes from the beginning starting at zero and finishing at the
2849
 * size in byte of the file if parsing a file. The function is
2850
 * of constant cost if the input is UTF-8 but can be costly if run
2851
 * on non-UTF-8 input.
2852
 *
2853
 * Returns the index in bytes from the beginning of the entity or -1
2854
 *         in case the index could not be computed.
2855
 */
2856
long
2857
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2858
0
    xmlParserInputPtr in;
2859
2860
0
    if (ctxt == NULL) return(-1);
2861
0
    in = ctxt->input;
2862
0
    if (in == NULL)  return(-1);
2863
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2864
0
        unsigned int unused = 0;
2865
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2866
        /*
2867
   * Encoding conversion, compute the number of unused original
2868
   * bytes from the input not consumed and subtract that from
2869
   * the raw consumed value, this is not a cheap operation
2870
   */
2871
0
        if (in->end - in->cur > 0) {
2872
0
      unsigned char convbuf[32000];
2873
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2874
0
      int toconv = in->end - in->cur, written = 32000;
2875
2876
0
      int ret;
2877
2878
0
            do {
2879
0
                toconv = in->end - cur;
2880
0
                written = 32000;
2881
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2882
0
                                        cur, &toconv);
2883
0
                if (ret < 0) {
2884
0
                    if (written > 0)
2885
0
                        ret = -2;
2886
0
                    else
2887
0
                        return(-1);
2888
0
                }
2889
0
                unused += written;
2890
0
                cur += toconv;
2891
0
            } while (ret == -2);
2892
0
  }
2893
0
  if (in->buf->rawconsumed < unused)
2894
0
      return(-1);
2895
0
  return(in->buf->rawconsumed - unused);
2896
0
    }
2897
0
    return(in->consumed + (in->cur - in->base));
2898
0
}
2899
2900
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2901
#ifdef LIBXML_ISO8859X_ENABLED
2902
2903
/**
2904
 * UTF8ToISO8859x:
2905
 * @out:  a pointer to an array of bytes to store the result
2906
 * @outlen:  the length of @out
2907
 * @in:  a pointer to an array of UTF-8 chars
2908
 * @inlen:  the length of @in
2909
 * @xlattable: the 2-level transcoding table
2910
 *
2911
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2912
 * block of chars out.
2913
 *
2914
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2915
 * The value of @inlen after return is the number of octets consumed
2916
 *     as the return value is positive, else unpredictable.
2917
 * The value of @outlen after return is the number of octets consumed.
2918
 */
2919
static int
2920
UTF8ToISO8859x(unsigned char* out, int *outlen,
2921
              const unsigned char* in, int *inlen,
2922
              const unsigned char* const xlattable) {
2923
    const unsigned char* outstart = out;
2924
    const unsigned char* inend;
2925
    const unsigned char* instart = in;
2926
    const unsigned char* processed = in;
2927
2928
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2929
        (xlattable == NULL))
2930
  return(-1);
2931
    if (in == NULL) {
2932
        /*
2933
        * initialization nothing to do
2934
        */
2935
        *outlen = 0;
2936
        *inlen = 0;
2937
        return(0);
2938
    }
2939
    inend = in + (*inlen);
2940
    while (in < inend) {
2941
        unsigned char d = *in++;
2942
        if  (d < 0x80)  {
2943
            *out++ = d;
2944
        } else if (d < 0xC0) {
2945
            /* trailing byte in leading position */
2946
            *outlen = out - outstart;
2947
            *inlen = processed - instart;
2948
            return(-2);
2949
        } else if (d < 0xE0) {
2950
            unsigned char c;
2951
            if (!(in < inend)) {
2952
                /* trailing byte not in input buffer */
2953
                *outlen = out - outstart;
2954
                *inlen = processed - instart;
2955
                return(-3);
2956
            }
2957
            c = *in++;
2958
            if ((c & 0xC0) != 0x80) {
2959
                /* not a trailing byte */
2960
                *outlen = out - outstart;
2961
                *inlen = processed - instart;
2962
                return(-2);
2963
            }
2964
            c = c & 0x3F;
2965
            d = d & 0x1F;
2966
            d = xlattable [48 + c + xlattable [d] * 64];
2967
            if (d == 0) {
2968
                /* not in character set */
2969
                *outlen = out - outstart;
2970
                *inlen = processed - instart;
2971
                return(-2);
2972
            }
2973
            *out++ = d;
2974
        } else if (d < 0xF0) {
2975
            unsigned char c1;
2976
            unsigned char c2;
2977
            if (!(in < inend - 1)) {
2978
                /* trailing bytes not in input buffer */
2979
                *outlen = out - outstart;
2980
                *inlen = processed - instart;
2981
                return(-3);
2982
            }
2983
            c1 = *in++;
2984
            if ((c1 & 0xC0) != 0x80) {
2985
                /* not a trailing byte (c1) */
2986
                *outlen = out - outstart;
2987
                *inlen = processed - instart;
2988
                return(-2);
2989
            }
2990
            c2 = *in++;
2991
            if ((c2 & 0xC0) != 0x80) {
2992
                /* not a trailing byte (c2) */
2993
                *outlen = out - outstart;
2994
                *inlen = processed - instart;
2995
                return(-2);
2996
            }
2997
            c1 = c1 & 0x3F;
2998
            c2 = c2 & 0x3F;
2999
      d = d & 0x0F;
3000
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3001
      xlattable [32 + d] * 64] * 64];
3002
            if (d == 0) {
3003
                /* not in character set */
3004
                *outlen = out - outstart;
3005
                *inlen = processed - instart;
3006
                return(-2);
3007
            }
3008
            *out++ = d;
3009
        } else {
3010
            /* cannot transcode >= U+010000 */
3011
            *outlen = out - outstart;
3012
            *inlen = processed - instart;
3013
            return(-2);
3014
        }
3015
        processed = in;
3016
    }
3017
    *outlen = out - outstart;
3018
    *inlen = processed - instart;
3019
    return(*outlen);
3020
}
3021
3022
/**
3023
 * ISO8859xToUTF8
3024
 * @out:  a pointer to an array of bytes to store the result
3025
 * @outlen:  the length of @out
3026
 * @in:  a pointer to an array of ISO Latin 1 chars
3027
 * @inlen:  the length of @in
3028
 *
3029
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3030
 * block of chars out.
3031
 * Returns 0 if success, or -1 otherwise
3032
 * The value of @inlen after return is the number of octets consumed
3033
 * The value of @outlen after return is the number of octets produced.
3034
 */
3035
static int
3036
ISO8859xToUTF8(unsigned char* out, int *outlen,
3037
              const unsigned char* in, int *inlen,
3038
              unsigned short const *unicodetable) {
3039
    unsigned char* outstart = out;
3040
    unsigned char* outend;
3041
    const unsigned char* instart = in;
3042
    const unsigned char* inend;
3043
    const unsigned char* instop;
3044
    unsigned int c;
3045
3046
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3047
        (in == NULL) || (unicodetable == NULL))
3048
  return(-1);
3049
    outend = out + *outlen;
3050
    inend = in + *inlen;
3051
    instop = inend;
3052
3053
    while ((in < inend) && (out < outend - 2)) {
3054
        if (*in >= 0x80) {
3055
            c = unicodetable [*in - 0x80];
3056
            if (c == 0) {
3057
                /* undefined code point */
3058
                *outlen = out - outstart;
3059
                *inlen = in - instart;
3060
                return (-1);
3061
            }
3062
            if (c < 0x800) {
3063
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3064
                *out++ = (c & 0x3F) | 0x80;
3065
            } else {
3066
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3067
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3068
                *out++ = (c & 0x3F) | 0x80;
3069
            }
3070
            ++in;
3071
        }
3072
        if (instop - in > outend - out) instop = in + (outend - out);
3073
        while ((*in < 0x80) && (in < instop)) {
3074
            *out++ = *in++;
3075
        }
3076
    }
3077
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3078
        *out++ =  *in++;
3079
    }
3080
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3081
        *out++ =  *in++;
3082
    }
3083
    *outlen = out - outstart;
3084
    *inlen = in - instart;
3085
    return (*outlen);
3086
}
3087
3088
3089
/************************************************************************
3090
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3091
 ************************************************************************/
3092
3093
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3094
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3095
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3096
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3097
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3098
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3099
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3100
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3101
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3102
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3103
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3104
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3105
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3106
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3107
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3108
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3109
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3110
};
3111
3112
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3113
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3114
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3121
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3122
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3123
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3124
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3125
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3126
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3133
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3134
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3135
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3136
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3137
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3138
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3139
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3140
};
3141
3142
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3143
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3144
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3145
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3146
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3147
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3148
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3149
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3150
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3151
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3152
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3153
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3154
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3155
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3156
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3157
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3158
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3159
};
3160
3161
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3162
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3170
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3171
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3172
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3175
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3187
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3188
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3189
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3190
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3191
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3192
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3193
};
3194
3195
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3196
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3197
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3198
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3199
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3200
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3201
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3202
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3203
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3204
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3205
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3206
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3207
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3208
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3209
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3210
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3211
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3212
};
3213
3214
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3215
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3216
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3223
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3224
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3225
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3226
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3227
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3229
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3233
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3234
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3236
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3239
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3240
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3241
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3242
};
3243
3244
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3245
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3246
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3247
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3248
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3249
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3250
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3251
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3252
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3253
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3254
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3255
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3256
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3257
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3258
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3259
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3260
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3261
};
3262
3263
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3264
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3272
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3273
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3274
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3276
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3277
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3278
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3279
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3280
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3281
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
};
3292
3293
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3294
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3295
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3296
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3297
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3298
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3299
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3300
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3301
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3302
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3303
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3304
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3305
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3306
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3307
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3308
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3310
};
3311
3312
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3313
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3321
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3322
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3330
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3331
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3332
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3333
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
};
3337
3338
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3339
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3340
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3341
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3342
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3343
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3344
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3345
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3346
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3347
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3348
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3349
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3350
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3351
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3352
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3353
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3354
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3355
};
3356
3357
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3358
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3366
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3367
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3368
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3369
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3382
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3383
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3384
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3385
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3386
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
};
3390
3391
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3392
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3393
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3394
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3395
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3396
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3397
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3398
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3399
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3400
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3401
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3402
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3403
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3404
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3405
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3406
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3407
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3408
};
3409
3410
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3411
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3419
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3420
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3421
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3422
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3430
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3435
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3436
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3440
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3441
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
};
3443
3444
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3445
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3446
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3447
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3448
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3449
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3450
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3451
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3452
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3453
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3454
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3455
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3456
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3457
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3458
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3459
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3460
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3461
};
3462
3463
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3464
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3472
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3473
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3474
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3475
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3476
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3477
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3478
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487
};
3488
3489
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3490
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3491
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3492
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3493
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3494
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3495
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3496
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3497
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3498
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3499
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3500
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3501
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3502
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3503
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3504
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3505
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3506
};
3507
3508
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3509
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3517
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3518
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3519
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3520
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3521
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3522
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3523
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3524
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3525
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3527
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3528
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3537
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3538
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3539
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3540
};
3541
3542
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3543
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3544
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3545
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3546
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3547
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3548
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3549
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3550
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3551
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3552
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3553
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3554
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3555
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3556
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3557
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3558
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3559
};
3560
3561
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3562
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3570
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3571
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3577
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3578
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3579
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3580
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3581
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3586
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3587
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589
};
3590
3591
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3592
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3593
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3594
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3595
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3596
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3597
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3598
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3599
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3600
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3601
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3602
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3603
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3604
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3605
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3606
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3607
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3608
};
3609
3610
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3611
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3619
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3620
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3621
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3622
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3628
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3632
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3633
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3634
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3636
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3637
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3638
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3639
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3641
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3642
};
3643
3644
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3645
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3646
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3647
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3648
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3649
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3650
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3651
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3652
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3653
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3654
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3655
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3656
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3657
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3658
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3659
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3660
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3661
};
3662
3663
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3664
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3672
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3673
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3674
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3679
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3680
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3681
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3699
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3701
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3702
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3704
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3705
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3706
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3707
};
3708
3709
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3710
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3711
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3712
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3713
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3714
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3715
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3716
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3717
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3718
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3719
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3720
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3721
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3722
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3723
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3724
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3725
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3726
};
3727
3728
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3729
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3737
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3738
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3739
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3740
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3752
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3753
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3754
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3755
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3756
};
3757
3758
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3759
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3760
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3761
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3762
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3763
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3764
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3765
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3766
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3767
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3768
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3769
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3770
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3771
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3772
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3773
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3774
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3775
};
3776
3777
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3778
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3786
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3787
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3788
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3789
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3790
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3794
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3795
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3797
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3804
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3807
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3811
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3814
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3815
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3816
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3817
};
3818
3819
3820
/*
3821
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3822
 */
3823
3824
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3825
    const unsigned char* in, int *inlen) {
3826
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3827
}
3828
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3829
    const unsigned char* in, int *inlen) {
3830
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3831
}
3832
3833
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3834
    const unsigned char* in, int *inlen) {
3835
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3836
}
3837
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3838
    const unsigned char* in, int *inlen) {
3839
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3840
}
3841
3842
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3843
    const unsigned char* in, int *inlen) {
3844
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3845
}
3846
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3847
    const unsigned char* in, int *inlen) {
3848
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3849
}
3850
3851
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3852
    const unsigned char* in, int *inlen) {
3853
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3854
}
3855
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3856
    const unsigned char* in, int *inlen) {
3857
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3858
}
3859
3860
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3861
    const unsigned char* in, int *inlen) {
3862
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3863
}
3864
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3865
    const unsigned char* in, int *inlen) {
3866
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3867
}
3868
3869
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3870
    const unsigned char* in, int *inlen) {
3871
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3872
}
3873
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3874
    const unsigned char* in, int *inlen) {
3875
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3876
}
3877
3878
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3879
    const unsigned char* in, int *inlen) {
3880
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3881
}
3882
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3883
    const unsigned char* in, int *inlen) {
3884
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3885
}
3886
3887
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3888
    const unsigned char* in, int *inlen) {
3889
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3890
}
3891
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3892
    const unsigned char* in, int *inlen) {
3893
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3894
}
3895
3896
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3897
    const unsigned char* in, int *inlen) {
3898
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3899
}
3900
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3901
    const unsigned char* in, int *inlen) {
3902
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3903
}
3904
3905
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3906
    const unsigned char* in, int *inlen) {
3907
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3908
}
3909
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3910
    const unsigned char* in, int *inlen) {
3911
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3912
}
3913
3914
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3915
    const unsigned char* in, int *inlen) {
3916
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3917
}
3918
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3919
    const unsigned char* in, int *inlen) {
3920
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3921
}
3922
3923
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3924
    const unsigned char* in, int *inlen) {
3925
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3926
}
3927
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3928
    const unsigned char* in, int *inlen) {
3929
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3930
}
3931
3932
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3933
    const unsigned char* in, int *inlen) {
3934
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3935
}
3936
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3937
    const unsigned char* in, int *inlen) {
3938
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3939
}
3940
3941
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3942
    const unsigned char* in, int *inlen) {
3943
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3944
}
3945
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3946
    const unsigned char* in, int *inlen) {
3947
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3948
}
3949
3950
static void
3951
xmlRegisterCharEncodingHandlersISO8859x (void) {
3952
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3953
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3954
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3955
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3956
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3957
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3958
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3959
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3960
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3961
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3962
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3963
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3964
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3965
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3966
}
3967
3968
#endif
3969
#endif
3970