Coverage Report

Created: 2024-05-04 01:11

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
8.63k
{
103
8.63k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
8.63k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
8.63k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
8.63k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
179k
              const unsigned char* in, int *inlen) {
182
179k
    unsigned char* outstart = out;
183
179k
    const unsigned char* base = in;
184
179k
    const unsigned char* processed = in;
185
179k
    unsigned char* outend = out + *outlen;
186
179k
    const unsigned char* inend;
187
179k
    unsigned int c;
188
189
179k
    inend = in + (*inlen);
190
2.43M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
2.42M
  c= *in++;
192
193
2.42M
        if (out >= outend)
194
0
      break;
195
2.42M
        if (c < 0x80) {
196
2.25M
      *out++ = c;
197
2.25M
  } else {
198
170k
      *outlen = out - outstart;
199
170k
      *inlen = processed - base;
200
170k
      return(-1);
201
170k
  }
202
203
2.25M
  processed = (const unsigned char*) in;
204
2.25M
    }
205
8.69k
    *outlen = out - outstart;
206
8.69k
    *inlen = processed - base;
207
8.69k
    return(*outlen);
208
179k
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
7.54k
              const unsigned char* in, int *inlen) {
229
7.54k
    const unsigned char* processed = in;
230
7.54k
    const unsigned char* outend;
231
7.54k
    const unsigned char* outstart = out;
232
7.54k
    const unsigned char* instart = in;
233
7.54k
    const unsigned char* inend;
234
7.54k
    unsigned int c, d;
235
7.54k
    int trailing;
236
237
7.54k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
7.54k
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
678
  *outlen = 0;
243
678
  *inlen = 0;
244
678
  return(0);
245
678
    }
246
6.86k
    inend = in + (*inlen);
247
6.86k
    outend = out + (*outlen);
248
336k
    while (in < inend) {
249
332k
  d = *in++;
250
332k
  if      (d < 0x80)  { c= d; trailing= 0; }
251
3.07k
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
3.07k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
386
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
211
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
332k
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
336k
  for ( ; trailing; trailing--) {
271
3.67k
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
3.67k
      c <<= 6;
274
3.67k
      c |= d & 0x3F;
275
3.67k
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
332k
  if (c < 0x80) {
279
329k
      if (out >= outend)
280
0
    break;
281
329k
      *out++ = c;
282
329k
  } else {
283
      /* no chance for this in Ascii */
284
3.07k
      *outlen = out - outstart;
285
3.07k
      *inlen = processed - instart;
286
3.07k
      return(-2);
287
3.07k
  }
288
329k
  processed = in;
289
329k
    }
290
3.78k
    *outlen = out - outstart;
291
3.78k
    *inlen = processed - instart;
292
3.78k
    return(*outlen);
293
6.86k
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
480k
              const unsigned char* in, int *inlen) {
313
480k
    unsigned char* outstart = out;
314
480k
    const unsigned char* base = in;
315
480k
    unsigned char* outend;
316
480k
    const unsigned char* inend;
317
480k
    const unsigned char* instop;
318
319
480k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
480k
    outend = out + *outlen;
323
480k
    inend = in + (*inlen);
324
480k
    instop = inend;
325
326
1.66M
    while ((in < inend) && (out < outend - 1)) {
327
1.18M
  if (*in >= 0x80) {
328
709k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
709k
            *out++ = ((*in) & 0x3F) | 0x80;
330
709k
      ++in;
331
709k
  }
332
1.18M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
165M
  while ((in < instop) && (*in < 0x80)) {
334
164M
      *out++ = *in++;
335
164M
  }
336
1.18M
    }
337
480k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
480k
    *outlen = out - outstart;
341
480k
    *inlen = in - base;
342
480k
    return(*outlen);
343
480k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
9.74k
{
362
9.74k
    int len;
363
364
9.74k
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
9.74k
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
2.42k
        *outlen = 0;
369
2.42k
        *inlenb = 0;
370
2.42k
        return(0);
371
2.42k
    }
372
7.31k
    if (*outlen > *inlenb) {
373
7.31k
  len = *inlenb;
374
7.31k
    } else {
375
0
  len = *outlen;
376
0
    }
377
7.31k
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
7.31k
    memcpy(out, inb, len);
386
387
7.31k
    *outlen = len;
388
7.31k
    *inlenb = len;
389
7.31k
    return(*outlen);
390
7.31k
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
21.3k
              const unsigned char* in, int *inlen) {
413
21.3k
    const unsigned char* processed = in;
414
21.3k
    const unsigned char* outend;
415
21.3k
    const unsigned char* outstart = out;
416
21.3k
    const unsigned char* instart = in;
417
21.3k
    const unsigned char* inend;
418
21.3k
    unsigned int c, d;
419
21.3k
    int trailing;
420
421
21.3k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
21.3k
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
2.15k
  *outlen = 0;
427
2.15k
  *inlen = 0;
428
2.15k
  return(0);
429
2.15k
    }
430
19.1k
    inend = in + (*inlen);
431
19.1k
    outend = out + (*outlen);
432
32.4M
    while (in < inend) {
433
32.4M
  d = *in++;
434
32.4M
  if      (d < 0x80)  { c= d; trailing= 0; }
435
105k
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
859
      *outlen = out - outstart;
438
859
      *inlen = processed - instart;
439
859
      return(-2);
440
104k
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
2.61k
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
695
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
92
  else {
444
      /* no chance for this in IsoLat1 */
445
92
      *outlen = out - outstart;
446
92
      *inlen = processed - instart;
447
92
      return(-2);
448
92
  }
449
450
32.4M
  if (inend - in < trailing) {
451
29
      break;
452
29
  }
453
454
32.5M
  for ( ; trailing; trailing--) {
455
107k
      if (in >= inend)
456
0
    break;
457
107k
      if (((d= *in++) & 0xC0) != 0x80) {
458
218
    *outlen = out - outstart;
459
218
    *inlen = processed - instart;
460
218
    return(-2);
461
218
      }
462
106k
      c <<= 6;
463
106k
      c |= d & 0x3F;
464
106k
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
32.4M
  if (c <= 0xFF) {
468
32.4M
      if (out >= outend)
469
0
    break;
470
32.4M
      *out++ = c;
471
32.4M
  } else {
472
      /* no chance for this in IsoLat1 */
473
4.43k
      *outlen = out - outstart;
474
4.43k
      *inlen = processed - instart;
475
4.43k
      return(-2);
476
4.43k
  }
477
32.4M
  processed = in;
478
32.4M
    }
479
13.5k
    *outlen = out - outstart;
480
13.5k
    *inlen = processed - instart;
481
13.5k
    return(*outlen);
482
19.1k
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
50.9k
{
506
50.9k
    unsigned char* outstart = out;
507
50.9k
    const unsigned char* processed = inb;
508
50.9k
    unsigned char* outend;
509
50.9k
    unsigned short* in = (unsigned short*) inb;
510
50.9k
    unsigned short* inend;
511
50.9k
    unsigned int c, d, inlen;
512
50.9k
    unsigned char *tmp;
513
50.9k
    int bits;
514
515
50.9k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
50.9k
    outend = out + *outlen;
520
50.9k
    if ((*inlenb % 2) == 1)
521
16.2k
        (*inlenb)--;
522
50.9k
    inlen = *inlenb / 2;
523
50.9k
    inend = in + inlen;
524
4.24M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
4.19M
        if (xmlLittleEndian) {
526
4.19M
      c= *in++;
527
4.19M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
4.19M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
3.09k
      if (in >= inend) {           /* handle split mutli-byte characters */
535
1.69k
    break;
536
1.69k
      }
537
1.40k
      if (xmlLittleEndian) {
538
1.40k
    d = *in++;
539
1.40k
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
1.40k
            if ((d & 0xFC00) == 0xDC00) {
546
507
                c &= 0x03FF;
547
507
                c <<= 10;
548
507
                c |= d & 0x03FF;
549
507
                c += 0x10000;
550
507
            }
551
894
            else {
552
894
    *outlen = out - outstart;
553
894
    *inlenb = processed - inb;
554
894
          return(-2);
555
894
      }
556
1.40k
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
4.19M
        if (out >= outend)
560
0
      break;
561
4.19M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
4.17M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
4.16M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
507
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
12.5M
        for ( ; bits >= 0; bits-= 6) {
567
8.34M
            if (out >= outend)
568
0
          break;
569
8.34M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
8.34M
        }
571
4.19M
  processed = (const unsigned char*) in;
572
4.19M
    }
573
50.0k
    *outlen = out - outstart;
574
50.0k
    *inlenb = processed - inb;
575
50.0k
    return(*outlen);
576
50.9k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
57.3k
{
596
57.3k
    unsigned short* out = (unsigned short*) outb;
597
57.3k
    const unsigned char* processed = in;
598
57.3k
    const unsigned char *const instart = in;
599
57.3k
    unsigned short* outstart= out;
600
57.3k
    unsigned short* outend;
601
57.3k
    const unsigned char* inend;
602
57.3k
    unsigned int c, d;
603
57.3k
    int trailing;
604
57.3k
    unsigned char *tmp;
605
57.3k
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
57.3k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
57.3k
    if (in == NULL) {
610
22
  *outlen = 0;
611
22
  *inlen = 0;
612
22
  return(0);
613
22
    }
614
57.3k
    inend= in + *inlen;
615
57.3k
    outend = out + (*outlen / 2);
616
678k
    while (in < inend) {
617
676k
      d= *in++;
618
676k
      if      (d < 0x80)  { c= d; trailing= 0; }
619
76.5k
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
1.99k
    *outlen = (out - outstart) * 2;
622
1.99k
    *inlen = processed - instart;
623
1.99k
    return(-2);
624
74.5k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
65.9k
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
59.4k
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
110
      else {
628
  /* no chance for this in UTF-16 */
629
110
  *outlen = (out - outstart) * 2;
630
110
  *inlen = processed - instart;
631
110
  return(-2);
632
110
      }
633
634
674k
      if (inend - in < trailing) {
635
28
          break;
636
28
      }
637
638
862k
      for ( ; trailing; trailing--) {
639
195k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
7.45k
        break;
641
187k
          c <<= 6;
642
187k
          c |= d & 0x3F;
643
187k
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
674k
        if (c < 0x10000) {
647
617k
            if (out >= outend)
648
0
          break;
649
617k
      if (xmlLittleEndian) {
650
617k
    *out++ = c;
651
617k
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
617k
        }
658
57.4k
        else if (c < 0x110000) {
659
4.22k
            if (out+1 >= outend)
660
0
          break;
661
4.22k
            c -= 0x10000;
662
4.22k
      if (xmlLittleEndian) {
663
4.22k
    *out++ = 0xD800 | (c >> 10);
664
4.22k
    *out++ = 0xDC00 | (c & 0x03FF);
665
4.22k
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
4.22k
        }
679
53.2k
        else
680
53.2k
      break;
681
621k
  processed = in;
682
621k
    }
683
55.2k
    *outlen = (out - outstart) * 2;
684
55.2k
    *inlen = processed - instart;
685
55.2k
    return(*outlen);
686
57.3k
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
57.7k
{
705
57.7k
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
492
        if (*outlen >= 2) {
710
492
      outb[0] = 0xFF;
711
492
      outb[1] = 0xFE;
712
492
      *outlen = 2;
713
492
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
492
      return(2);
719
492
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
492
    }
724
57.2k
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
57.7k
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
33.9k
{
749
33.9k
    unsigned char* outstart = out;
750
33.9k
    const unsigned char* processed = inb;
751
33.9k
    unsigned char* outend;
752
33.9k
    unsigned short* in = (unsigned short*) inb;
753
33.9k
    unsigned short* inend;
754
33.9k
    unsigned int c, d, inlen;
755
33.9k
    unsigned char *tmp;
756
33.9k
    int bits;
757
758
33.9k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
33.9k
    outend = out + *outlen;
763
33.9k
    if ((*inlenb % 2) == 1)
764
18.5k
        (*inlenb)--;
765
33.9k
    inlen = *inlenb / 2;
766
33.9k
    inend= in + inlen;
767
3.74M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
3.71M
  if (xmlLittleEndian) {
769
3.71M
      tmp = (unsigned char *) in;
770
3.71M
      c = *tmp++;
771
3.71M
      c = (c << 8) | *tmp;
772
3.71M
      in++;
773
3.71M
  } else {
774
0
      c= *in++;
775
0
  }
776
3.71M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
3.14k
      if (in >= inend) {           /* handle split mutli-byte characters */
778
1.63k
                break;
779
1.63k
      }
780
1.51k
      if (xmlLittleEndian) {
781
1.51k
    tmp = (unsigned char *) in;
782
1.51k
    d = *tmp++;
783
1.51k
    d = (d << 8) | *tmp;
784
1.51k
    in++;
785
1.51k
      } else {
786
0
    d= *in++;
787
0
      }
788
1.51k
            if ((d & 0xFC00) == 0xDC00) {
789
548
                c &= 0x03FF;
790
548
                c <<= 10;
791
548
                c |= d & 0x03FF;
792
548
                c += 0x10000;
793
548
            }
794
963
            else {
795
963
    *outlen = out - outstart;
796
963
    *inlenb = processed - inb;
797
963
          return(-2);
798
963
      }
799
1.51k
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
3.71M
        if (out >= outend)
803
0
      break;
804
3.71M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
3.70M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
3.69M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
548
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
11.1M
        for ( ; bits >= 0; bits-= 6) {
810
7.39M
            if (out >= outend)
811
0
          break;
812
7.39M
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
7.39M
        }
814
3.71M
  processed = (const unsigned char*) in;
815
3.71M
    }
816
32.9k
    *outlen = out - outstart;
817
32.9k
    *inlenb = processed - inb;
818
32.9k
    return(*outlen);
819
33.9k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
251
{
839
251
    unsigned short* out = (unsigned short*) outb;
840
251
    const unsigned char* processed = in;
841
251
    const unsigned char *const instart = in;
842
251
    unsigned short* outstart= out;
843
251
    unsigned short* outend;
844
251
    const unsigned char* inend;
845
251
    unsigned int c, d;
846
251
    int trailing;
847
251
    unsigned char *tmp;
848
251
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
251
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
251
    if (in == NULL) {
853
53
  *outlen = 0;
854
53
  *inlen = 0;
855
53
  return(0);
856
53
    }
857
198
    inend= in + *inlen;
858
198
    outend = out + (*outlen / 2);
859
213k
    while (in < inend) {
860
213k
      d= *in++;
861
213k
      if      (d < 0x80)  { c= d; trailing= 0; }
862
210k
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
210k
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
210k
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
28
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
213k
      if (inend - in < trailing) {
878
23
          break;
879
23
      }
880
881
634k
      for ( ; trailing; trailing--) {
882
421k
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
421k
          c <<= 6;
884
421k
          c |= d & 0x3F;
885
421k
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
213k
        if (c < 0x10000) {
889
213k
            if (out >= outend)  break;
890
213k
      if (xmlLittleEndian) {
891
213k
    tmp = (unsigned char *) out;
892
213k
    *tmp = c >> 8;
893
213k
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
213k
    out++;
895
213k
      } else {
896
0
    *out++ = c;
897
0
      }
898
213k
        }
899
28
        else if (c < 0x110000) {
900
28
            if (out+1 >= outend)  break;
901
28
            c -= 0x10000;
902
28
      if (xmlLittleEndian) {
903
28
    tmp1 = 0xD800 | (c >> 10);
904
28
    tmp = (unsigned char *) out;
905
28
    *tmp = tmp1 >> 8;
906
28
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
28
    out++;
908
909
28
    tmp2 = 0xDC00 | (c & 0x03FF);
910
28
    tmp = (unsigned char *) out;
911
28
    *tmp = tmp2 >> 8;
912
28
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
28
    out++;
914
28
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
28
        }
919
0
        else
920
0
      break;
921
213k
  processed = in;
922
213k
    }
923
198
    *outlen = (out - outstart) * 2;
924
198
    *inlen = processed - instart;
925
198
    return(*outlen);
926
198
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
544k
{
949
544k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
544k
    if (len >= 4) {
952
544k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
544k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
168
      return(XML_CHAR_ENCODING_UCS4BE);
955
544k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
544k
      (in[2] == 0x00) && (in[3] == 0x00))
957
174
      return(XML_CHAR_ENCODING_UCS4LE);
958
544k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
544k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
80
      return(XML_CHAR_ENCODING_UCS4_2143);
961
544k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
544k
      (in[2] == 0x00) && (in[3] == 0x00))
963
87
      return(XML_CHAR_ENCODING_UCS4_3412);
964
544k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
544k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
3.02k
      return(XML_CHAR_ENCODING_EBCDIC);
967
541k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
541k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
215k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
325k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
325k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
1.06k
      return(XML_CHAR_ENCODING_UTF16LE);
978
324k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
324k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
922
      return(XML_CHAR_ENCODING_UTF16BE);
981
324k
    }
982
323k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
323k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
323k
      (in[2] == 0xBF))
989
4.38k
      return(XML_CHAR_ENCODING_UTF8);
990
323k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
319k
    if (len >= 2) {
993
319k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
2.13k
      return(XML_CHAR_ENCODING_UTF16BE);
995
317k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
2.70k
      return(XML_CHAR_ENCODING_UTF16LE);
997
317k
    }
998
314k
    return(XML_CHAR_ENCODING_NONE);
999
319k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
57.7k
xmlGetEncodingAlias(const char *alias) {
1035
57.7k
    int i;
1036
57.7k
    char upper[100];
1037
1038
57.7k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
57.7k
    if (xmlCharEncodingAliases == NULL)
1042
57.7k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = toupper(alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = toupper(alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
81.4k
{
1167
81.4k
    const char *alias;
1168
81.4k
    char upper[500];
1169
81.4k
    int i;
1170
1171
81.4k
    if (name == NULL)
1172
66.8k
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
14.5k
    alias = xmlGetEncodingAlias(name);
1178
14.5k
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
144k
    for (i = 0;i < 499;i++) {
1182
144k
        upper[i] = toupper(name[i]);
1183
144k
  if (upper[i] == 0) break;
1184
144k
    }
1185
14.5k
    upper[i] = 0;
1186
1187
14.5k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
14.5k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
12.1k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
12.0k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
11.5k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
11.5k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
11.5k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
11.5k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
11.4k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
11.0k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
11.0k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
11.0k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
8.88k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
8.85k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
8.85k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
8.83k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
8.81k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
8.81k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
8.79k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
8.77k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
8.65k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
8.62k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
8.61k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
8.59k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
8.58k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
8.58k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
8.58k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
8.50k
    return(XML_CHAR_ENCODING_ERROR);
1235
8.58k
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
674
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
674
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
444
        case XML_CHAR_ENCODING_UCS4LE:
1264
444
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
80
        case XML_CHAR_ENCODING_UCS4_2143:
1268
80
            return("ISO-10646-UCS-4");
1269
87
        case XML_CHAR_ENCODING_UCS4_3412:
1270
87
            return("ISO-10646-UCS-4");
1271
24
        case XML_CHAR_ENCODING_UCS2:
1272
24
            return("ISO-10646-UCS-2");
1273
18
        case XML_CHAR_ENCODING_8859_1:
1274
18
      return("ISO-8859-1");
1275
21
        case XML_CHAR_ENCODING_8859_2:
1276
21
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
674
    }
1300
0
    return(NULL);
1301
674
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
499k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = toupper(name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
2.32k
xmlInitEncodingInternal(void) {
1501
2.32k
    unsigned short int tst = 0x1234;
1502
2.32k
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
2.32k
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
2.32k
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
2.32k
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
688k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
688k
    xmlCharEncodingHandlerPtr handler;
1592
1593
688k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
613k
        case XML_CHAR_ENCODING_NONE:
1597
613k
      return(NULL);
1598
62.6k
        case XML_CHAR_ENCODING_UTF8:
1599
62.6k
      return(NULL);
1600
4.68k
        case XML_CHAR_ENCODING_UTF16LE:
1601
4.68k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
3.47k
        case XML_CHAR_ENCODING_UTF16BE:
1603
3.47k
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
3.49k
        case XML_CHAR_ENCODING_EBCDIC:
1605
3.49k
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
3.49k
            if (handler != NULL) return(handler);
1607
3.49k
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
3.49k
            if (handler != NULL) return(handler);
1609
3.49k
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
3.49k
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
224
        case XML_CHAR_ENCODING_UCS4BE:
1615
224
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
224
            if (handler != NULL) return(handler);
1617
224
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
224
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
220
        case XML_CHAR_ENCODING_UCS4LE:
1623
220
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
220
            if (handler != NULL) return(handler);
1625
220
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
220
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
99
        case XML_CHAR_ENCODING_UCS4_2143:
1631
99
      break;
1632
106
        case XML_CHAR_ENCODING_UCS4_3412:
1633
106
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
688k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
205
    return(NULL);
1712
688k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
43.1k
xmlFindCharEncodingHandler(const char *name) {
1725
43.1k
    const char *nalias;
1726
43.1k
    const char *norig;
1727
43.1k
    xmlCharEncoding alias;
1728
43.1k
#ifdef LIBXML_ICONV_ENABLED
1729
43.1k
    xmlCharEncodingHandlerPtr enc;
1730
43.1k
    iconv_t icv_in, icv_out;
1731
43.1k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
43.1k
    char upper[100];
1737
43.1k
    int i;
1738
1739
43.1k
    if (name == NULL) return(NULL);
1740
43.1k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
43.1k
    norig = name;
1746
43.1k
    nalias = xmlGetEncodingAlias(name);
1747
43.1k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
411k
    for (i = 0;i < 99;i++) {
1754
411k
        upper[i] = toupper(name[i]);
1755
411k
  if (upper[i] == 0) break;
1756
411k
    }
1757
43.1k
    upper[i] = 0;
1758
1759
277k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
260k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
26.5k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
260k
    }
1763
1764
16.6k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
16.6k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
16.6k
    icv_in = iconv_open("UTF-8", name);
1779
16.6k
    icv_out = iconv_open(name, "UTF-8");
1780
16.6k
    if (icv_in == (iconv_t) -1) {
1781
8.02k
        icv_in = iconv_open("UTF-8", upper);
1782
8.02k
    }
1783
16.6k
    if (icv_out == (iconv_t) -1) {
1784
8.02k
  icv_out = iconv_open(upper, "UTF-8");
1785
8.02k
    }
1786
16.6k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
8.60k
      enc = (xmlCharEncodingHandlerPtr)
1788
8.60k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
8.60k
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
8.60k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
8.60k
      enc->name = xmlMemStrdup(name);
1796
8.60k
      enc->input = NULL;
1797
8.60k
      enc->output = NULL;
1798
8.60k
      enc->iconv_in = icv_in;
1799
8.60k
      enc->iconv_out = icv_out;
1800
#ifdef DEBUG_ENCODING
1801
            xmlGenericError(xmlGenericErrorContext,
1802
        "Found iconv handler for encoding %s\n", name);
1803
#endif
1804
8.60k
      return enc;
1805
8.60k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1806
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1807
0
        "iconv : problems with filters for '%s'\n", name);
1808
0
      if (icv_in != (iconv_t) -1)
1809
0
    iconv_close(icv_in);
1810
0
      else
1811
0
    iconv_close(icv_out);
1812
0
    }
1813
8.02k
#endif /* LIBXML_ICONV_ENABLED */
1814
#ifdef LIBXML_ICU_ENABLED
1815
    /* check whether icu can handle this */
1816
    ucv_in = openIcuConverter(name, 1);
1817
    ucv_out = openIcuConverter(name, 0);
1818
    if (ucv_in != NULL && ucv_out != NULL) {
1819
      encu = (xmlCharEncodingHandlerPtr)
1820
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1821
      if (encu == NULL) {
1822
                closeIcuConverter(ucv_in);
1823
                closeIcuConverter(ucv_out);
1824
    return(NULL);
1825
      }
1826
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1827
      encu->name = xmlMemStrdup(name);
1828
      encu->input = NULL;
1829
      encu->output = NULL;
1830
      encu->uconv_in = ucv_in;
1831
      encu->uconv_out = ucv_out;
1832
#ifdef DEBUG_ENCODING
1833
            xmlGenericError(xmlGenericErrorContext,
1834
        "Found ICU converter handler for encoding %s\n", name);
1835
#endif
1836
      return encu;
1837
    } else if (ucv_in != NULL || ucv_out != NULL) {
1838
            closeIcuConverter(ucv_in);
1839
            closeIcuConverter(ucv_out);
1840
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1841
        "ICU converter : problems with filters for '%s'\n", name);
1842
    }
1843
#endif /* LIBXML_ICU_ENABLED */
1844
1845
#ifdef DEBUG_ENCODING
1846
    xmlGenericError(xmlGenericErrorContext,
1847
      "No handler found for encoding %s\n", name);
1848
#endif
1849
1850
    /*
1851
     * Fallback using the canonical names
1852
     */
1853
8.02k
    alias = xmlParseCharEncoding(norig);
1854
8.02k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1855
507
        const char* canon;
1856
507
        canon = xmlGetCharEncodingName(alias);
1857
507
        if ((canon != NULL) && (strcmp(name, canon))) {
1858
45
      return(xmlFindCharEncodingHandler(canon));
1859
45
        }
1860
507
    }
1861
1862
    /* If "none of the above", give up */
1863
7.97k
    return(NULL);
1864
8.02k
}
1865
1866
/************************************************************************
1867
 *                  *
1868
 *    ICONV based generic conversion functions    *
1869
 *                  *
1870
 ************************************************************************/
1871
1872
#ifdef LIBXML_ICONV_ENABLED
1873
/**
1874
 * xmlIconvWrapper:
1875
 * @cd:   iconv converter data structure
1876
 * @out:  a pointer to an array of bytes to store the result
1877
 * @outlen:  the length of @out
1878
 * @in:  a pointer to an array of input bytes
1879
 * @inlen:  the length of @in
1880
 *
1881
 * Returns 0 if success, or
1882
 *     -1 by lack of space, or
1883
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1884
 *        the result of transformation can't fit into the encoding we want), or
1885
 *     -3 if there the last byte can't form a single output char.
1886
 *
1887
 * The value of @inlen after return is the number of octets consumed
1888
 *     as the return value is positive, else unpredictable.
1889
 * The value of @outlen after return is the number of octets produced.
1890
 */
1891
static int
1892
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1893
61.1k
                const unsigned char *in, int *inlen) {
1894
61.1k
    size_t icv_inlen, icv_outlen;
1895
61.1k
    const char *icv_in = (const char *) in;
1896
61.1k
    char *icv_out = (char *) out;
1897
61.1k
    size_t ret;
1898
1899
61.1k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900
682
        if (outlen != NULL) *outlen = 0;
1901
682
        return(-1);
1902
682
    }
1903
60.4k
    icv_inlen = *inlen;
1904
60.4k
    icv_outlen = *outlen;
1905
    /*
1906
     * Some versions take const, other versions take non-const input.
1907
     */
1908
60.4k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1909
60.4k
    *inlen -= icv_inlen;
1910
60.4k
    *outlen -= icv_outlen;
1911
60.4k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1912
11.0k
#ifdef EILSEQ
1913
11.0k
        if (errno == EILSEQ) {
1914
7.70k
            return -2;
1915
7.70k
        } else
1916
3.31k
#endif
1917
3.31k
#ifdef E2BIG
1918
3.31k
        if (errno == E2BIG) {
1919
180
            return -1;
1920
180
        } else
1921
3.13k
#endif
1922
3.13k
#ifdef EINVAL
1923
3.13k
        if (errno == EINVAL) {
1924
3.13k
            return -3;
1925
3.13k
        } else
1926
0
#endif
1927
0
        {
1928
0
            return -3;
1929
0
        }
1930
11.0k
    }
1931
49.4k
    return 0;
1932
60.4k
}
1933
#endif /* LIBXML_ICONV_ENABLED */
1934
1935
/************************************************************************
1936
 *                  *
1937
 *    ICU based generic conversion functions    *
1938
 *                  *
1939
 ************************************************************************/
1940
1941
#ifdef LIBXML_ICU_ENABLED
1942
/**
1943
 * xmlUconvWrapper:
1944
 * @cd: ICU uconverter data structure
1945
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1946
 * @out:  a pointer to an array of bytes to store the result
1947
 * @outlen:  the length of @out
1948
 * @in:  a pointer to an array of input bytes
1949
 * @inlen:  the length of @in
1950
 * @flush: if true, indicates end of input
1951
 *
1952
 * Returns 0 if success, or
1953
 *     -1 by lack of space, or
1954
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1955
 *        the result of transformation can't fit into the encoding we want), or
1956
 *     -3 if there the last byte can't form a single output char.
1957
 *
1958
 * The value of @inlen after return is the number of octets consumed
1959
 *     as the return value is positive, else unpredictable.
1960
 * The value of @outlen after return is the number of octets produced.
1961
 */
1962
static int
1963
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1964
                const unsigned char *in, int *inlen, int flush) {
1965
    const char *ucv_in = (const char *) in;
1966
    char *ucv_out = (char *) out;
1967
    UErrorCode err = U_ZERO_ERROR;
1968
1969
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1970
        if (outlen != NULL) *outlen = 0;
1971
        return(-1);
1972
    }
1973
1974
    if (toUnicode) {
1975
        /* encoding => UTF-16 => UTF-8 */
1976
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1977
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1978
                       &cd->pivot_source, &cd->pivot_target,
1979
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1980
    } else {
1981
        /* UTF-8 => UTF-16 => encoding */
1982
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1983
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1984
                       &cd->pivot_source, &cd->pivot_target,
1985
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1986
    }
1987
    *inlen = ucv_in - (const char*) in;
1988
    *outlen = ucv_out - (char *) out;
1989
    if (U_SUCCESS(err)) {
1990
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
1991
        if (flush)
1992
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1993
        return 0;
1994
    }
1995
    if (err == U_BUFFER_OVERFLOW_ERROR)
1996
        return -1;
1997
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1998
        return -2;
1999
    return -3;
2000
}
2001
#endif /* LIBXML_ICU_ENABLED */
2002
2003
/************************************************************************
2004
 *                  *
2005
 *    The real API used by libxml for on-the-fly conversion *
2006
 *                  *
2007
 ************************************************************************/
2008
2009
/**
2010
 * xmlEncInputChunk:
2011
 * @handler:  encoding handler
2012
 * @out:  a pointer to an array of bytes to store the result
2013
 * @outlen:  the length of @out
2014
 * @in:  a pointer to an array of input bytes
2015
 * @inlen:  the length of @in
2016
 * @flush:  flush (ICU-related)
2017
 *
2018
 * Returns 0 if success, or
2019
 *     -1 by lack of space, or
2020
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2021
 *        the result of transformation can't fit into the encoding we want), or
2022
 *     -3 if there the last byte can't form a single output char.
2023
 *
2024
 * The value of @inlen after return is the number of octets consumed
2025
 *     as the return value is 0, else unpredictable.
2026
 * The value of @outlen after return is the number of octets produced.
2027
 */
2028
static int
2029
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030
802k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2031
802k
    int ret;
2032
802k
    (void)flush;
2033
2034
802k
    if (handler->input != NULL) {
2035
745k
        ret = handler->input(out, outlen, in, inlen);
2036
745k
        if (ret > 0)
2037
543k
           ret = 0;
2038
745k
    }
2039
57.2k
#ifdef LIBXML_ICONV_ENABLED
2040
57.2k
    else if (handler->iconv_in != NULL) {
2041
56.9k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2042
56.9k
    }
2043
303
#endif /* LIBXML_ICONV_ENABLED */
2044
#ifdef LIBXML_ICU_ENABLED
2045
    else if (handler->uconv_in != NULL) {
2046
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2047
                              flush);
2048
    }
2049
#endif /* LIBXML_ICU_ENABLED */
2050
303
    else {
2051
303
        *outlen = 0;
2052
303
        *inlen = 0;
2053
303
        ret = -2;
2054
303
    }
2055
2056
802k
    return(ret);
2057
802k
}
2058
2059
/**
2060
 * xmlEncOutputChunk:
2061
 * @handler:  encoding handler
2062
 * @out:  a pointer to an array of bytes to store the result
2063
 * @outlen:  the length of @out
2064
 * @in:  a pointer to an array of input bytes
2065
 * @inlen:  the length of @in
2066
 *
2067
 * Returns 0 if success, or
2068
 *     -1 by lack of space, or
2069
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2070
 *        the result of transformation can't fit into the encoding we want), or
2071
 *     -3 if there the last byte can't form a single output char.
2072
 *     -4 if no output function was found.
2073
 *
2074
 * The value of @inlen after return is the number of octets consumed
2075
 *     as the return value is 0, else unpredictable.
2076
 * The value of @outlen after return is the number of octets produced.
2077
 */
2078
static int
2079
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2080
100k
                  int *outlen, const unsigned char *in, int *inlen) {
2081
100k
    int ret;
2082
2083
100k
    if (handler->output != NULL) {
2084
96.7k
        ret = handler->output(out, outlen, in, inlen);
2085
96.7k
        if (ret > 0)
2086
27.3k
           ret = 0;
2087
96.7k
    }
2088
4.17k
#ifdef LIBXML_ICONV_ENABLED
2089
4.17k
    else if (handler->iconv_out != NULL) {
2090
4.17k
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2091
4.17k
    }
2092
0
#endif /* LIBXML_ICONV_ENABLED */
2093
#ifdef LIBXML_ICU_ENABLED
2094
    else if (handler->uconv_out != NULL) {
2095
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2096
                              1);
2097
    }
2098
#endif /* LIBXML_ICU_ENABLED */
2099
0
    else {
2100
0
        *outlen = 0;
2101
0
        *inlen = 0;
2102
0
        ret = -4;
2103
0
    }
2104
2105
100k
    return(ret);
2106
100k
}
2107
2108
/**
2109
 * xmlCharEncFirstLine:
2110
 * @handler:  char encoding transformation data structure
2111
 * @out:  an xmlBuffer for the output.
2112
 * @in:  an xmlBuffer for the input
2113
 *
2114
 * Front-end for the encoding handler input function, but handle only
2115
 * the very first line, i.e. limit itself to 45 chars.
2116
 *
2117
 * Returns the number of byte written if success, or
2118
 *     -1 general error
2119
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2120
 *        the result of transformation can't fit into the encoding we want), or
2121
 */
2122
int
2123
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2124
0
                    xmlBufferPtr in) {
2125
0
    int ret;
2126
0
    int written;
2127
0
    int toconv;
2128
2129
0
    if (handler == NULL) return(-1);
2130
0
    if (out == NULL) return(-1);
2131
0
    if (in == NULL) return(-1);
2132
2133
    /* calculate space available */
2134
0
    written = out->size - out->use - 1; /* count '\0' */
2135
0
    toconv = in->use;
2136
    /*
2137
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2138
     * 45 chars should be sufficient to reach the end of the encoding
2139
     * declaration without going too far inside the document content.
2140
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2141
     * The actual value depending on guessed encoding is passed as @len
2142
     * if provided
2143
     */
2144
0
    if (toconv > 180)
2145
0
        toconv = 180;
2146
0
    if (toconv * 2 >= written) {
2147
0
        xmlBufferGrow(out, toconv * 2);
2148
0
  written = out->size - out->use - 1;
2149
0
    }
2150
2151
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2152
0
                           in->content, &toconv, 0);
2153
0
    xmlBufferShrink(in, toconv);
2154
0
    out->use += written;
2155
0
    out->content[out->use] = 0;
2156
0
    if (ret == -1) ret = -3;
2157
2158
#ifdef DEBUG_ENCODING
2159
    switch (ret) {
2160
        case 0:
2161
      xmlGenericError(xmlGenericErrorContext,
2162
        "converted %d bytes to %d bytes of input\n",
2163
              toconv, written);
2164
      break;
2165
        case -1:
2166
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2167
              toconv, written, in->use);
2168
      break;
2169
        case -2:
2170
      xmlGenericError(xmlGenericErrorContext,
2171
        "input conversion failed due to input error\n");
2172
      break;
2173
        case -3:
2174
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2175
              toconv, written, in->use);
2176
      break;
2177
  default:
2178
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2179
    }
2180
#endif /* DEBUG_ENCODING */
2181
    /*
2182
     * Ignore when input buffer is not on a boundary
2183
     */
2184
0
    if (ret == -3) ret = 0;
2185
0
    if (ret == -1) ret = 0;
2186
0
    return(written ? written : ret);
2187
0
}
2188
2189
/**
2190
 * xmlCharEncFirstLineInput:
2191
 * @input: a parser input buffer
2192
 * @len:  number of bytes to convert for the first line, or -1
2193
 *
2194
 * Front-end for the encoding handler input function, but handle only
2195
 * the very first line. Point is that this is based on autodetection
2196
 * of the encoding and once that first line is converted we may find
2197
 * out that a different decoder is needed to process the input.
2198
 *
2199
 * Returns the number of byte written if success, or
2200
 *     -1 general error
2201
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2202
 *        the result of transformation can't fit into the encoding we want), or
2203
 */
2204
int
2205
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2206
32.2k
{
2207
32.2k
    int ret;
2208
32.2k
    size_t written;
2209
32.2k
    size_t toconv;
2210
32.2k
    int c_in;
2211
32.2k
    int c_out;
2212
32.2k
    xmlBufPtr in;
2213
32.2k
    xmlBufPtr out;
2214
2215
32.2k
    if ((input == NULL) || (input->encoder == NULL) ||
2216
32.2k
        (input->buffer == NULL) || (input->raw == NULL))
2217
0
        return (-1);
2218
32.2k
    out = input->buffer;
2219
32.2k
    in = input->raw;
2220
2221
32.2k
    toconv = xmlBufUse(in);
2222
32.2k
    if (toconv == 0)
2223
273
        return (0);
2224
31.9k
    written = xmlBufAvail(out);
2225
    /*
2226
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2227
     * 45 chars should be sufficient to reach the end of the encoding
2228
     * declaration without going too far inside the document content.
2229
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2230
     * The actual value depending on guessed encoding is passed as @len
2231
     * if provided
2232
     */
2233
31.9k
    if (len >= 0) {
2234
8.28k
        if (toconv > (unsigned int) len)
2235
6.37k
            toconv = len;
2236
23.6k
    } else {
2237
23.6k
        if (toconv > 180)
2238
14.8k
            toconv = 180;
2239
23.6k
    }
2240
31.9k
    if (toconv * 2 >= written) {
2241
0
        xmlBufGrow(out, toconv * 2);
2242
0
        written = xmlBufAvail(out);
2243
0
    }
2244
31.9k
    if (written > 360)
2245
31.9k
        written = 360;
2246
2247
31.9k
    c_in = toconv;
2248
31.9k
    c_out = written;
2249
31.9k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2250
31.9k
                           xmlBufContent(in), &c_in, 0);
2251
31.9k
    xmlBufShrink(in, c_in);
2252
31.9k
    xmlBufAddLen(out, c_out);
2253
31.9k
    if (ret == -1)
2254
1.94k
        ret = -3;
2255
2256
31.9k
    switch (ret) {
2257
26.7k
        case 0:
2258
#ifdef DEBUG_ENCODING
2259
            xmlGenericError(xmlGenericErrorContext,
2260
                            "converted %d bytes to %d bytes of input\n",
2261
                            c_in, c_out);
2262
#endif
2263
26.7k
            break;
2264
0
        case -1:
2265
#ifdef DEBUG_ENCODING
2266
            xmlGenericError(xmlGenericErrorContext,
2267
                         "converted %d bytes to %d bytes of input, %d left\n",
2268
                            c_in, c_out, (int)xmlBufUse(in));
2269
#endif
2270
0
            break;
2271
2.13k
        case -3:
2272
#ifdef DEBUG_ENCODING
2273
            xmlGenericError(xmlGenericErrorContext,
2274
                        "converted %d bytes to %d bytes of input, %d left\n",
2275
                            c_in, c_out, (int)xmlBufUse(in));
2276
#endif
2277
2.13k
            break;
2278
3.05k
        case -2: {
2279
3.05k
            char buf[50];
2280
3.05k
            const xmlChar *content = xmlBufContent(in);
2281
2282
3.05k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2283
3.05k
         content[0], content[1],
2284
3.05k
         content[2], content[3]);
2285
3.05k
      buf[49] = 0;
2286
3.05k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2287
3.05k
        "input conversion failed due to input error, bytes %s\n",
2288
3.05k
               buf);
2289
3.05k
        }
2290
31.9k
    }
2291
    /*
2292
     * Ignore when input buffer is not on a boundary
2293
     */
2294
31.9k
    if (ret == -3) ret = 0;
2295
31.9k
    if (ret == -1) ret = 0;
2296
31.9k
    return(c_out ? c_out : ret);
2297
31.9k
}
2298
2299
/**
2300
 * xmlCharEncInput:
2301
 * @input: a parser input buffer
2302
 * @flush: try to flush all the raw buffer
2303
 *
2304
 * Generic front-end for the encoding handler on parser input
2305
 *
2306
 * Returns the number of byte written if success, or
2307
 *     -1 general error
2308
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2309
 *        the result of transformation can't fit into the encoding we want), or
2310
 */
2311
int
2312
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2313
1.00M
{
2314
1.00M
    int ret;
2315
1.00M
    size_t written;
2316
1.00M
    size_t toconv;
2317
1.00M
    int c_in;
2318
1.00M
    int c_out;
2319
1.00M
    xmlBufPtr in;
2320
1.00M
    xmlBufPtr out;
2321
2322
1.00M
    if ((input == NULL) || (input->encoder == NULL) ||
2323
1.00M
        (input->buffer == NULL) || (input->raw == NULL))
2324
0
        return (-1);
2325
1.00M
    out = input->buffer;
2326
1.00M
    in = input->raw;
2327
2328
1.00M
    toconv = xmlBufUse(in);
2329
1.00M
    if (toconv == 0)
2330
234k
        return (0);
2331
770k
    if ((toconv > 64 * 1024) && (flush == 0))
2332
7
        toconv = 64 * 1024;
2333
770k
    written = xmlBufAvail(out);
2334
770k
    if (toconv * 2 >= written) {
2335
45.2k
        if (xmlBufGrow(out, toconv * 2) < 0)
2336
0
            return (-1);
2337
45.2k
        written = xmlBufAvail(out);
2338
45.2k
    }
2339
770k
    if ((written > 128 * 1024) && (flush == 0))
2340
8
        written = 128 * 1024;
2341
2342
770k
    c_in = toconv;
2343
770k
    c_out = written;
2344
770k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2345
770k
                           xmlBufContent(in), &c_in, flush);
2346
770k
    xmlBufShrink(in, c_in);
2347
770k
    xmlBufAddLen(out, c_out);
2348
770k
    if (ret == -1)
2349
168k
        ret = -3;
2350
2351
770k
    switch (ret) {
2352
593k
        case 0:
2353
#ifdef DEBUG_ENCODING
2354
            xmlGenericError(xmlGenericErrorContext,
2355
                            "converted %d bytes to %d bytes of input\n",
2356
                            c_in, c_out);
2357
#endif
2358
593k
            break;
2359
0
        case -1:
2360
#ifdef DEBUG_ENCODING
2361
            xmlGenericError(xmlGenericErrorContext,
2362
                         "converted %d bytes to %d bytes of input, %d left\n",
2363
                            c_in, c_out, (int)xmlBufUse(in));
2364
#endif
2365
0
            break;
2366
171k
        case -3:
2367
#ifdef DEBUG_ENCODING
2368
            xmlGenericError(xmlGenericErrorContext,
2369
                        "converted %d bytes to %d bytes of input, %d left\n",
2370
                            c_in, c_out, (int)xmlBufUse(in));
2371
#endif
2372
171k
            break;
2373
5.58k
        case -2: {
2374
5.58k
            char buf[50];
2375
5.58k
            const xmlChar *content = xmlBufContent(in);
2376
2377
5.58k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2378
5.58k
         content[0], content[1],
2379
5.58k
         content[2], content[3]);
2380
5.58k
      buf[49] = 0;
2381
5.58k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2382
5.58k
        "input conversion failed due to input error, bytes %s\n",
2383
5.58k
               buf);
2384
5.58k
        }
2385
770k
    }
2386
    /*
2387
     * Ignore when input buffer is not on a boundary
2388
     */
2389
770k
    if (ret == -3)
2390
171k
        ret = 0;
2391
770k
    return (c_out? c_out : ret);
2392
770k
}
2393
2394
/**
2395
 * xmlCharEncInFunc:
2396
 * @handler:  char encoding transformation data structure
2397
 * @out:  an xmlBuffer for the output.
2398
 * @in:  an xmlBuffer for the input
2399
 *
2400
 * Generic front-end for the encoding handler input function
2401
 *
2402
 * Returns the number of byte written if success, or
2403
 *     -1 general error
2404
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2405
 *        the result of transformation can't fit into the encoding we want), or
2406
 */
2407
int
2408
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2409
                 xmlBufferPtr in)
2410
0
{
2411
0
    int ret;
2412
0
    int written;
2413
0
    int toconv;
2414
2415
0
    if (handler == NULL)
2416
0
        return (-1);
2417
0
    if (out == NULL)
2418
0
        return (-1);
2419
0
    if (in == NULL)
2420
0
        return (-1);
2421
2422
0
    toconv = in->use;
2423
0
    if (toconv == 0)
2424
0
        return (0);
2425
0
    written = out->size - out->use -1; /* count '\0' */
2426
0
    if (toconv * 2 >= written) {
2427
0
        xmlBufferGrow(out, out->size + toconv * 2);
2428
0
        written = out->size - out->use - 1;
2429
0
    }
2430
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2431
0
                           in->content, &toconv, 1);
2432
0
    xmlBufferShrink(in, toconv);
2433
0
    out->use += written;
2434
0
    out->content[out->use] = 0;
2435
0
    if (ret == -1)
2436
0
        ret = -3;
2437
2438
0
    switch (ret) {
2439
0
        case 0:
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                            "converted %d bytes to %d bytes of input\n",
2443
                            toconv, written);
2444
#endif
2445
0
            break;
2446
0
        case -1:
2447
#ifdef DEBUG_ENCODING
2448
            xmlGenericError(xmlGenericErrorContext,
2449
                         "converted %d bytes to %d bytes of input, %d left\n",
2450
                            toconv, written, in->use);
2451
#endif
2452
0
            break;
2453
0
        case -3:
2454
#ifdef DEBUG_ENCODING
2455
            xmlGenericError(xmlGenericErrorContext,
2456
                        "converted %d bytes to %d bytes of input, %d left\n",
2457
                            toconv, written, in->use);
2458
#endif
2459
0
            break;
2460
0
        case -2: {
2461
0
            char buf[50];
2462
2463
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2464
0
         in->content[0], in->content[1],
2465
0
         in->content[2], in->content[3]);
2466
0
      buf[49] = 0;
2467
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2468
0
        "input conversion failed due to input error, bytes %s\n",
2469
0
               buf);
2470
0
        }
2471
0
    }
2472
    /*
2473
     * Ignore when input buffer is not on a boundary
2474
     */
2475
0
    if (ret == -3)
2476
0
        ret = 0;
2477
0
    return (written? written : ret);
2478
0
}
2479
2480
#ifdef LIBXML_OUTPUT_ENABLED
2481
/**
2482
 * xmlCharEncOutput:
2483
 * @output: a parser output buffer
2484
 * @init: is this an initialization call without data
2485
 *
2486
 * Generic front-end for the encoding handler on parser output
2487
 * a first call with @init == 1 has to be made first to initiate the
2488
 * output in case of non-stateless encoding needing to initiate their
2489
 * state or the output (like the BOM in UTF16).
2490
 * In case of UTF8 sequence conversion errors for the given encoder,
2491
 * the content will be automatically remapped to a CharRef sequence.
2492
 *
2493
 * Returns the number of byte written if success, or
2494
 *     -1 general error
2495
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2496
 *        the result of transformation can't fit into the encoding we want), or
2497
 */
2498
int
2499
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2500
84.8k
{
2501
84.8k
    int ret;
2502
84.8k
    size_t written;
2503
84.8k
    int writtentot = 0;
2504
84.8k
    size_t toconv;
2505
84.8k
    int c_in;
2506
84.8k
    int c_out;
2507
84.8k
    xmlBufPtr in;
2508
84.8k
    xmlBufPtr out;
2509
2510
84.8k
    if ((output == NULL) || (output->encoder == NULL) ||
2511
84.8k
        (output->buffer == NULL) || (output->conv == NULL))
2512
0
        return (-1);
2513
84.8k
    out = output->conv;
2514
84.8k
    in = output->buffer;
2515
2516
95.8k
retry:
2517
2518
95.8k
    written = xmlBufAvail(out);
2519
2520
    /*
2521
     * First specific handling of the initialization call
2522
     */
2523
95.8k
    if (init) {
2524
6.51k
        c_in = 0;
2525
6.51k
        c_out = written;
2526
        /* TODO: Check return value. */
2527
6.51k
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2528
6.51k
                          NULL, &c_in);
2529
6.51k
        xmlBufAddLen(out, c_out);
2530
#ifdef DEBUG_ENCODING
2531
  xmlGenericError(xmlGenericErrorContext,
2532
    "initialized encoder\n");
2533
#endif
2534
6.51k
        return(c_out);
2535
6.51k
    }
2536
2537
    /*
2538
     * Conversion itself.
2539
     */
2540
89.2k
    toconv = xmlBufUse(in);
2541
89.2k
    if (toconv == 0)
2542
5.93k
        return (writtentot);
2543
83.3k
    if (toconv > 64 * 1024)
2544
780
        toconv = 64 * 1024;
2545
83.3k
    if (toconv * 4 >= written) {
2546
5.31k
        xmlBufGrow(out, toconv * 4);
2547
5.31k
        written = xmlBufAvail(out);
2548
5.31k
    }
2549
83.3k
    if (written > 256 * 1024)
2550
970
        written = 256 * 1024;
2551
2552
83.3k
    c_in = toconv;
2553
83.3k
    c_out = written;
2554
83.3k
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2555
83.3k
                            xmlBufContent(in), &c_in);
2556
83.3k
    xmlBufShrink(in, c_in);
2557
83.3k
    xmlBufAddLen(out, c_out);
2558
83.3k
    writtentot += c_out;
2559
83.3k
    if (ret == -1) {
2560
0
        if (c_out > 0) {
2561
            /* Can be a limitation of iconv or uconv */
2562
0
            goto retry;
2563
0
        }
2564
0
        ret = -3;
2565
0
    }
2566
2567
    /*
2568
     * Attempt to handle error cases
2569
     */
2570
83.3k
    switch (ret) {
2571
71.1k
        case 0:
2572
#ifdef DEBUG_ENCODING
2573
      xmlGenericError(xmlGenericErrorContext,
2574
        "converted %d bytes to %d bytes of output\n",
2575
              c_in, c_out);
2576
#endif
2577
71.1k
      break;
2578
0
        case -1:
2579
#ifdef DEBUG_ENCODING
2580
      xmlGenericError(xmlGenericErrorContext,
2581
        "output conversion failed by lack of space\n");
2582
#endif
2583
0
      break;
2584
149
        case -3:
2585
#ifdef DEBUG_ENCODING
2586
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2587
              c_in, c_out, (int) xmlBufUse(in));
2588
#endif
2589
149
      break;
2590
0
        case -4:
2591
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2592
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2593
0
            ret = -1;
2594
0
            break;
2595
12.0k
        case -2: {
2596
12.0k
      xmlChar charref[20];
2597
12.0k
      int len = xmlBufUse(in);
2598
12.0k
            xmlChar *content = xmlBufContent(in);
2599
12.0k
      int cur, charrefLen;
2600
2601
12.0k
      cur = xmlGetUTF8Char(content, &len);
2602
12.0k
      if (cur <= 0)
2603
1.01k
                break;
2604
2605
#ifdef DEBUG_ENCODING
2606
            xmlGenericError(xmlGenericErrorContext,
2607
                    "handling output conversion error\n");
2608
            xmlGenericError(xmlGenericErrorContext,
2609
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2610
                    content[0], content[1],
2611
                    content[2], content[3]);
2612
#endif
2613
            /*
2614
             * Removes the UTF8 sequence, and replace it by a charref
2615
             * and continue the transcoding phase, hoping the error
2616
             * did not mangle the encoder state.
2617
             */
2618
11.0k
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2619
11.0k
                             "&#%d;", cur);
2620
11.0k
            xmlBufShrink(in, len);
2621
11.0k
            xmlBufGrow(out, charrefLen * 4);
2622
11.0k
            c_out = xmlBufAvail(out);
2623
11.0k
            c_in = charrefLen;
2624
11.0k
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2625
11.0k
                                    charref, &c_in);
2626
2627
11.0k
      if ((ret < 0) || (c_in != charrefLen)) {
2628
0
    char buf[50];
2629
2630
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2631
0
       content[0], content[1],
2632
0
       content[2], content[3]);
2633
0
    buf[49] = 0;
2634
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2635
0
        "output conversion failed due to conv error, bytes %s\n",
2636
0
             buf);
2637
0
    content[0] = ' ';
2638
0
                break;
2639
0
      }
2640
2641
11.0k
            xmlBufAddLen(out, c_out);
2642
11.0k
            writtentot += c_out;
2643
11.0k
            goto retry;
2644
11.0k
  }
2645
83.3k
    }
2646
72.3k
    return(writtentot ? writtentot : ret);
2647
83.3k
}
2648
#endif
2649
2650
/**
2651
 * xmlCharEncOutFunc:
2652
 * @handler:  char encoding transformation data structure
2653
 * @out:  an xmlBuffer for the output.
2654
 * @in:  an xmlBuffer for the input
2655
 *
2656
 * Generic front-end for the encoding handler output function
2657
 * a first call with @in == NULL has to be made firs to initiate the
2658
 * output in case of non-stateless encoding needing to initiate their
2659
 * state or the output (like the BOM in UTF16).
2660
 * In case of UTF8 sequence conversion errors for the given encoder,
2661
 * the content will be automatically remapped to a CharRef sequence.
2662
 *
2663
 * Returns the number of byte written if success, or
2664
 *     -1 general error
2665
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2666
 *        the result of transformation can't fit into the encoding we want), or
2667
 */
2668
int
2669
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2670
0
                  xmlBufferPtr in) {
2671
0
    int ret;
2672
0
    int written;
2673
0
    int writtentot = 0;
2674
0
    int toconv;
2675
2676
0
    if (handler == NULL) return(-1);
2677
0
    if (out == NULL) return(-1);
2678
2679
0
retry:
2680
2681
0
    written = out->size - out->use;
2682
2683
0
    if (written > 0)
2684
0
  written--; /* Gennady: count '/0' */
2685
2686
    /*
2687
     * First specific handling of in = NULL, i.e. the initialization call
2688
     */
2689
0
    if (in == NULL) {
2690
0
        toconv = 0;
2691
        /* TODO: Check return value. */
2692
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2693
0
                          NULL, &toconv);
2694
0
        out->use += written;
2695
0
        out->content[out->use] = 0;
2696
#ifdef DEBUG_ENCODING
2697
  xmlGenericError(xmlGenericErrorContext,
2698
    "initialized encoder\n");
2699
#endif
2700
0
        return(0);
2701
0
    }
2702
2703
    /*
2704
     * Conversion itself.
2705
     */
2706
0
    toconv = in->use;
2707
0
    if (toconv == 0)
2708
0
  return(0);
2709
0
    if (toconv * 4 >= written) {
2710
0
        xmlBufferGrow(out, toconv * 4);
2711
0
  written = out->size - out->use - 1;
2712
0
    }
2713
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714
0
                            in->content, &toconv);
2715
0
    xmlBufferShrink(in, toconv);
2716
0
    out->use += written;
2717
0
    writtentot += written;
2718
0
    out->content[out->use] = 0;
2719
0
    if (ret == -1) {
2720
0
        if (written > 0) {
2721
            /* Can be a limitation of iconv or uconv */
2722
0
            goto retry;
2723
0
        }
2724
0
        ret = -3;
2725
0
    }
2726
2727
    /*
2728
     * Attempt to handle error cases
2729
     */
2730
0
    switch (ret) {
2731
0
        case 0:
2732
#ifdef DEBUG_ENCODING
2733
      xmlGenericError(xmlGenericErrorContext,
2734
        "converted %d bytes to %d bytes of output\n",
2735
              toconv, written);
2736
#endif
2737
0
      break;
2738
0
        case -1:
2739
#ifdef DEBUG_ENCODING
2740
      xmlGenericError(xmlGenericErrorContext,
2741
        "output conversion failed by lack of space\n");
2742
#endif
2743
0
      break;
2744
0
        case -3:
2745
#ifdef DEBUG_ENCODING
2746
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2747
              toconv, written, in->use);
2748
#endif
2749
0
      break;
2750
0
        case -4:
2751
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2752
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2753
0
      ret = -1;
2754
0
            break;
2755
0
        case -2: {
2756
0
      xmlChar charref[20];
2757
0
      int len = in->use;
2758
0
      const xmlChar *utf = (const xmlChar *) in->content;
2759
0
      int cur, charrefLen;
2760
2761
0
      cur = xmlGetUTF8Char(utf, &len);
2762
0
      if (cur <= 0)
2763
0
                break;
2764
2765
#ifdef DEBUG_ENCODING
2766
            xmlGenericError(xmlGenericErrorContext,
2767
                    "handling output conversion error\n");
2768
            xmlGenericError(xmlGenericErrorContext,
2769
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2770
                    in->content[0], in->content[1],
2771
                    in->content[2], in->content[3]);
2772
#endif
2773
            /*
2774
             * Removes the UTF8 sequence, and replace it by a charref
2775
             * and continue the transcoding phase, hoping the error
2776
             * did not mangle the encoder state.
2777
             */
2778
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2779
0
                             "&#%d;", cur);
2780
0
            xmlBufferShrink(in, len);
2781
0
            xmlBufferGrow(out, charrefLen * 4);
2782
0
      written = out->size - out->use - 1;
2783
0
            toconv = charrefLen;
2784
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2785
0
                                    charref, &toconv);
2786
2787
0
      if ((ret < 0) || (toconv != charrefLen)) {
2788
0
    char buf[50];
2789
2790
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2791
0
       in->content[0], in->content[1],
2792
0
       in->content[2], in->content[3]);
2793
0
    buf[49] = 0;
2794
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2795
0
        "output conversion failed due to conv error, bytes %s\n",
2796
0
             buf);
2797
0
    in->content[0] = ' ';
2798
0
          break;
2799
0
      }
2800
2801
0
            out->use += written;
2802
0
            writtentot += written;
2803
0
            out->content[out->use] = 0;
2804
0
            goto retry;
2805
0
  }
2806
0
    }
2807
0
    return(writtentot ? writtentot : ret);
2808
0
}
2809
2810
/**
2811
 * xmlCharEncCloseFunc:
2812
 * @handler:  char encoding transformation data structure
2813
 *
2814
 * Generic front-end for encoding handler close function
2815
 *
2816
 * Returns 0 if success, or -1 in case of error
2817
 */
2818
int
2819
41.9k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2820
41.9k
    int ret = 0;
2821
41.9k
    int tofree = 0;
2822
41.9k
    int i = 0;
2823
2824
41.9k
    if (handler == NULL) return(-1);
2825
41.9k
    if (handler->name == NULL) return(-1);
2826
2827
222k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2828
213k
        if (handler == &defaultHandlers[i])
2829
33.3k
            return(0);
2830
213k
    }
2831
2832
8.60k
    if (handlers != NULL) {
2833
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2834
0
            if (handler == handlers[i])
2835
0
                return(0);
2836
0
  }
2837
0
    }
2838
8.60k
#ifdef LIBXML_ICONV_ENABLED
2839
    /*
2840
     * Iconv handlers can be used only once, free the whole block.
2841
     * and the associated icon resources.
2842
     */
2843
8.60k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2844
8.60k
        tofree = 1;
2845
8.60k
  if (handler->iconv_out != NULL) {
2846
8.60k
      if (iconv_close(handler->iconv_out))
2847
0
    ret = -1;
2848
8.60k
      handler->iconv_out = NULL;
2849
8.60k
  }
2850
8.60k
  if (handler->iconv_in != NULL) {
2851
8.60k
      if (iconv_close(handler->iconv_in))
2852
0
    ret = -1;
2853
8.60k
      handler->iconv_in = NULL;
2854
8.60k
  }
2855
8.60k
    }
2856
8.60k
#endif /* LIBXML_ICONV_ENABLED */
2857
#ifdef LIBXML_ICU_ENABLED
2858
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2859
        tofree = 1;
2860
  if (handler->uconv_out != NULL) {
2861
      closeIcuConverter(handler->uconv_out);
2862
      handler->uconv_out = NULL;
2863
  }
2864
  if (handler->uconv_in != NULL) {
2865
      closeIcuConverter(handler->uconv_in);
2866
      handler->uconv_in = NULL;
2867
  }
2868
    }
2869
#endif
2870
8.60k
    if (tofree) {
2871
        /* free up only dynamic handlers iconv/uconv */
2872
8.60k
        if (handler->name != NULL)
2873
8.60k
            xmlFree(handler->name);
2874
8.60k
        handler->name = NULL;
2875
8.60k
        xmlFree(handler);
2876
8.60k
    }
2877
#ifdef DEBUG_ENCODING
2878
    if (ret)
2879
        xmlGenericError(xmlGenericErrorContext,
2880
    "failed to close the encoding handler\n");
2881
    else
2882
        xmlGenericError(xmlGenericErrorContext,
2883
    "closed the encoding handler\n");
2884
#endif
2885
2886
8.60k
    return(ret);
2887
8.60k
}
2888
2889
/**
2890
 * xmlByteConsumed:
2891
 * @ctxt: an XML parser context
2892
 *
2893
 * This function provides the current index of the parser relative
2894
 * to the start of the current entity. This function is computed in
2895
 * bytes from the beginning starting at zero and finishing at the
2896
 * size in byte of the file if parsing a file. The function is
2897
 * of constant cost if the input is UTF-8 but can be costly if run
2898
 * on non-UTF-8 input.
2899
 *
2900
 * Returns the index in bytes from the beginning of the entity or -1
2901
 *         in case the index could not be computed.
2902
 */
2903
long
2904
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2905
0
    xmlParserInputPtr in;
2906
2907
0
    if (ctxt == NULL) return(-1);
2908
0
    in = ctxt->input;
2909
0
    if (in == NULL)  return(-1);
2910
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2911
0
        unsigned int unused = 0;
2912
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2913
        /*
2914
   * Encoding conversion, compute the number of unused original
2915
   * bytes from the input not consumed and subtract that from
2916
   * the raw consumed value, this is not a cheap operation
2917
   */
2918
0
        if (in->end - in->cur > 0) {
2919
0
      unsigned char convbuf[32000];
2920
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2921
0
      int toconv = in->end - in->cur, written = 32000;
2922
2923
0
      int ret;
2924
2925
0
            do {
2926
0
                toconv = in->end - cur;
2927
0
                written = 32000;
2928
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2929
0
                                        cur, &toconv);
2930
0
                if (ret < 0) {
2931
0
                    if (written > 0)
2932
0
                        ret = -2;
2933
0
                    else
2934
0
                        return(-1);
2935
0
                }
2936
0
                unused += written;
2937
0
                cur += toconv;
2938
0
            } while (ret == -2);
2939
0
  }
2940
0
  if (in->buf->rawconsumed < unused)
2941
0
      return(-1);
2942
0
  return(in->buf->rawconsumed - unused);
2943
0
    }
2944
0
    return(in->consumed + (in->cur - in->base));
2945
0
}
2946
2947
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2948
#ifdef LIBXML_ISO8859X_ENABLED
2949
2950
/**
2951
 * UTF8ToISO8859x:
2952
 * @out:  a pointer to an array of bytes to store the result
2953
 * @outlen:  the length of @out
2954
 * @in:  a pointer to an array of UTF-8 chars
2955
 * @inlen:  the length of @in
2956
 * @xlattable: the 2-level transcoding table
2957
 *
2958
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2959
 * block of chars out.
2960
 *
2961
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2962
 * The value of @inlen after return is the number of octets consumed
2963
 *     as the return value is positive, else unpredictable.
2964
 * The value of @outlen after return is the number of octets consumed.
2965
 */
2966
static int
2967
UTF8ToISO8859x(unsigned char* out, int *outlen,
2968
              const unsigned char* in, int *inlen,
2969
              const unsigned char* const xlattable) {
2970
    const unsigned char* outstart = out;
2971
    const unsigned char* inend;
2972
    const unsigned char* instart = in;
2973
    const unsigned char* processed = in;
2974
2975
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2976
        (xlattable == NULL))
2977
  return(-1);
2978
    if (in == NULL) {
2979
        /*
2980
        * initialization nothing to do
2981
        */
2982
        *outlen = 0;
2983
        *inlen = 0;
2984
        return(0);
2985
    }
2986
    inend = in + (*inlen);
2987
    while (in < inend) {
2988
        unsigned char d = *in++;
2989
        if  (d < 0x80)  {
2990
            *out++ = d;
2991
        } else if (d < 0xC0) {
2992
            /* trailing byte in leading position */
2993
            *outlen = out - outstart;
2994
            *inlen = processed - instart;
2995
            return(-2);
2996
        } else if (d < 0xE0) {
2997
            unsigned char c;
2998
            if (!(in < inend)) {
2999
                /* trailing byte not in input buffer */
3000
                *outlen = out - outstart;
3001
                *inlen = processed - instart;
3002
                return(-3);
3003
            }
3004
            c = *in++;
3005
            if ((c & 0xC0) != 0x80) {
3006
                /* not a trailing byte */
3007
                *outlen = out - outstart;
3008
                *inlen = processed - instart;
3009
                return(-2);
3010
            }
3011
            c = c & 0x3F;
3012
            d = d & 0x1F;
3013
            d = xlattable [48 + c + xlattable [d] * 64];
3014
            if (d == 0) {
3015
                /* not in character set */
3016
                *outlen = out - outstart;
3017
                *inlen = processed - instart;
3018
                return(-2);
3019
            }
3020
            *out++ = d;
3021
        } else if (d < 0xF0) {
3022
            unsigned char c1;
3023
            unsigned char c2;
3024
            if (!(in < inend - 1)) {
3025
                /* trailing bytes not in input buffer */
3026
                *outlen = out - outstart;
3027
                *inlen = processed - instart;
3028
                return(-3);
3029
            }
3030
            c1 = *in++;
3031
            if ((c1 & 0xC0) != 0x80) {
3032
                /* not a trailing byte (c1) */
3033
                *outlen = out - outstart;
3034
                *inlen = processed - instart;
3035
                return(-2);
3036
            }
3037
            c2 = *in++;
3038
            if ((c2 & 0xC0) != 0x80) {
3039
                /* not a trailing byte (c2) */
3040
                *outlen = out - outstart;
3041
                *inlen = processed - instart;
3042
                return(-2);
3043
            }
3044
            c1 = c1 & 0x3F;
3045
            c2 = c2 & 0x3F;
3046
      d = d & 0x0F;
3047
      d = xlattable [48 + c2 + xlattable [48 + c1 +
3048
      xlattable [32 + d] * 64] * 64];
3049
            if (d == 0) {
3050
                /* not in character set */
3051
                *outlen = out - outstart;
3052
                *inlen = processed - instart;
3053
                return(-2);
3054
            }
3055
            *out++ = d;
3056
        } else {
3057
            /* cannot transcode >= U+010000 */
3058
            *outlen = out - outstart;
3059
            *inlen = processed - instart;
3060
            return(-2);
3061
        }
3062
        processed = in;
3063
    }
3064
    *outlen = out - outstart;
3065
    *inlen = processed - instart;
3066
    return(*outlen);
3067
}
3068
3069
/**
3070
 * ISO8859xToUTF8
3071
 * @out:  a pointer to an array of bytes to store the result
3072
 * @outlen:  the length of @out
3073
 * @in:  a pointer to an array of ISO Latin 1 chars
3074
 * @inlen:  the length of @in
3075
 *
3076
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3077
 * block of chars out.
3078
 * Returns 0 if success, or -1 otherwise
3079
 * The value of @inlen after return is the number of octets consumed
3080
 * The value of @outlen after return is the number of octets produced.
3081
 */
3082
static int
3083
ISO8859xToUTF8(unsigned char* out, int *outlen,
3084
              const unsigned char* in, int *inlen,
3085
              unsigned short const *unicodetable) {
3086
    unsigned char* outstart = out;
3087
    unsigned char* outend;
3088
    const unsigned char* instart = in;
3089
    const unsigned char* inend;
3090
    const unsigned char* instop;
3091
    unsigned int c;
3092
3093
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3094
        (in == NULL) || (unicodetable == NULL))
3095
  return(-1);
3096
    outend = out + *outlen;
3097
    inend = in + *inlen;
3098
    instop = inend;
3099
3100
    while ((in < inend) && (out < outend - 2)) {
3101
        if (*in >= 0x80) {
3102
            c = unicodetable [*in - 0x80];
3103
            if (c == 0) {
3104
                /* undefined code point */
3105
                *outlen = out - outstart;
3106
                *inlen = in - instart;
3107
                return (-1);
3108
            }
3109
            if (c < 0x800) {
3110
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3111
                *out++ = (c & 0x3F) | 0x80;
3112
            } else {
3113
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3114
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3115
                *out++ = (c & 0x3F) | 0x80;
3116
            }
3117
            ++in;
3118
        }
3119
        if (instop - in > outend - out) instop = in + (outend - out);
3120
        while ((*in < 0x80) && (in < instop)) {
3121
            *out++ = *in++;
3122
        }
3123
    }
3124
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3125
        *out++ =  *in++;
3126
    }
3127
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3128
        *out++ =  *in++;
3129
    }
3130
    *outlen = out - outstart;
3131
    *inlen = in - instart;
3132
    return (*outlen);
3133
}
3134
3135
3136
/************************************************************************
3137
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3138
 ************************************************************************/
3139
3140
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3141
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3142
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3143
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3144
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3145
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3146
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3147
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3148
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3149
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3150
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3151
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3152
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3153
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3154
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3155
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3156
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3157
};
3158
3159
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3160
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3168
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3169
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3170
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3172
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3175
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3176
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3180
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3181
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3182
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3183
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3184
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3185
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3186
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3187
};
3188
3189
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3190
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3195
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3196
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3197
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3198
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3199
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3201
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3202
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3203
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3205
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3206
};
3207
3208
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3209
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3220
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3221
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3222
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3223
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3234
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3235
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3236
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3237
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3238
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3239
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3240
};
3241
3242
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3243
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3244
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3245
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3246
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3247
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3248
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3249
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3250
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3251
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3252
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3253
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3254
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3255
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3256
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3257
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3258
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3259
};
3260
3261
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3262
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3270
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3271
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3272
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3273
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3274
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3275
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3276
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3279
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3280
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3281
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3282
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3283
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3286
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3287
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3288
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3289
};
3290
3291
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3292
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3297
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3298
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3299
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3300
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3301
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3302
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3303
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3304
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3305
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3306
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3307
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3308
};
3309
3310
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3311
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3323
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3324
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3326
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3328
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
};
3339
3340
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3341
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3347
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3349
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3350
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3351
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3352
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3353
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3354
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3355
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3356
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3357
};
3358
3359
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3360
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3377
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3378
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3379
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3380
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383
};
3384
3385
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3386
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3387
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3388
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3389
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3390
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3391
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3392
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3393
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3394
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3395
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3396
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3397
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3398
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3399
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3400
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3401
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3402
};
3403
3404
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3405
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3413
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3414
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3415
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3416
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3429
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3430
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3431
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3432
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3433
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436
};
3437
3438
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3439
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3440
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3441
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3442
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3443
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3444
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3445
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3446
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3447
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3448
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3449
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3450
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3451
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3452
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3453
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3454
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3455
};
3456
3457
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3458
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3466
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3467
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3468
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3469
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3482
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3483
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3487
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3488
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
};
3490
3491
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3492
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3493
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3494
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3495
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3496
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3497
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3498
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3499
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3500
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3501
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3502
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3503
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3504
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3505
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3506
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3507
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3508
};
3509
3510
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3511
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3519
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3520
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3524
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3525
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3528
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3532
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534
};
3535
3536
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3537
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3538
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3539
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3540
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3541
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3542
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3543
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3544
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3545
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3546
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3547
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3548
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3549
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3550
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3551
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3552
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3553
};
3554
3555
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3556
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3564
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3565
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3566
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3568
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3570
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3574
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3575
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3584
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3585
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3586
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3587
};
3588
3589
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3590
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3591
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3592
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3593
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3594
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3595
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3596
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3597
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3598
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3599
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3600
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3601
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3602
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3603
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3604
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3605
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3606
};
3607
3608
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3609
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3617
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3618
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3624
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3625
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3626
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3627
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3628
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3633
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
};
3637
3638
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3639
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3644
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3645
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3646
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3647
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3648
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3649
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3650
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3651
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3652
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3653
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3654
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3655
};
3656
3657
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3658
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3668
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3669
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3679
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3681
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3683
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3684
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3685
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3686
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3688
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3689
};
3690
3691
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3692
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3693
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3694
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3695
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3696
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3697
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3698
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3699
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3700
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3701
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3702
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3703
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3704
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3705
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3706
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3707
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3708
};
3709
3710
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3711
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3719
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3720
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3721
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3726
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3728
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3731
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3746
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3751
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3752
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3753
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3754
};
3755
3756
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3757
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3758
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3759
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3760
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3761
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3762
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3763
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3764
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3765
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3766
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3767
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3768
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3769
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3770
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3771
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3772
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3773
};
3774
3775
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3776
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3784
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3785
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3786
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3787
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3794
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3799
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3801
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3803
};
3804
3805
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3806
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3811
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3812
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3813
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3814
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3815
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3817
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3818
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3819
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3821
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3822
};
3823
3824
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3825
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3826
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3835
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3836
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3837
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3838
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3842
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3844
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3850
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3851
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3854
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3858
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3860
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3861
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3862
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3863
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3864
};
3865
3866
3867
/*
3868
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3869
 */
3870
3871
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3872
    const unsigned char* in, int *inlen) {
3873
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3874
}
3875
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3878
}
3879
3880
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3881
    const unsigned char* in, int *inlen) {
3882
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3883
}
3884
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3887
}
3888
3889
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3890
    const unsigned char* in, int *inlen) {
3891
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3892
}
3893
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3894
    const unsigned char* in, int *inlen) {
3895
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3896
}
3897
3898
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3899
    const unsigned char* in, int *inlen) {
3900
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3901
}
3902
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3903
    const unsigned char* in, int *inlen) {
3904
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3905
}
3906
3907
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3908
    const unsigned char* in, int *inlen) {
3909
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3910
}
3911
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3912
    const unsigned char* in, int *inlen) {
3913
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3914
}
3915
3916
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3917
    const unsigned char* in, int *inlen) {
3918
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3919
}
3920
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3921
    const unsigned char* in, int *inlen) {
3922
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3923
}
3924
3925
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3926
    const unsigned char* in, int *inlen) {
3927
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3928
}
3929
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3930
    const unsigned char* in, int *inlen) {
3931
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3932
}
3933
3934
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3935
    const unsigned char* in, int *inlen) {
3936
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3937
}
3938
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3939
    const unsigned char* in, int *inlen) {
3940
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3941
}
3942
3943
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3944
    const unsigned char* in, int *inlen) {
3945
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3946
}
3947
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3948
    const unsigned char* in, int *inlen) {
3949
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3950
}
3951
3952
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3953
    const unsigned char* in, int *inlen) {
3954
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3955
}
3956
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3957
    const unsigned char* in, int *inlen) {
3958
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3959
}
3960
3961
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3962
    const unsigned char* in, int *inlen) {
3963
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3964
}
3965
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3966
    const unsigned char* in, int *inlen) {
3967
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3968
}
3969
3970
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3971
    const unsigned char* in, int *inlen) {
3972
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3973
}
3974
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3975
    const unsigned char* in, int *inlen) {
3976
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3977
}
3978
3979
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3980
    const unsigned char* in, int *inlen) {
3981
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3982
}
3983
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3984
    const unsigned char* in, int *inlen) {
3985
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3986
}
3987
3988
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3989
    const unsigned char* in, int *inlen) {
3990
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3991
}
3992
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3993
    const unsigned char* in, int *inlen) {
3994
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3995
}
3996
3997
#endif
3998
#endif
3999