Coverage Report

Created: 2023-09-25 06:03

/src/libxml2-2.11.5/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
299
{
103
299
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
299
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
299
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
299
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
178
              const unsigned char* in, int *inlen) {
182
178
    unsigned char* outstart = out;
183
178
    const unsigned char* base = in;
184
178
    const unsigned char* processed = in;
185
178
    unsigned char* outend = out + *outlen;
186
178
    const unsigned char* inend;
187
178
    unsigned int c;
188
189
178
    inend = in + (*inlen);
190
619k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
619k
  c= *in++;
192
193
619k
        if (out >= outend)
194
0
      break;
195
619k
        if (c < 0x80) {
196
619k
      *out++ = c;
197
619k
  } else {
198
1
      *outlen = out - outstart;
199
1
      *inlen = processed - base;
200
1
      return(-2);
201
1
  }
202
203
619k
  processed = (const unsigned char*) in;
204
619k
    }
205
177
    *outlen = out - outstart;
206
177
    *inlen = processed - base;
207
177
    return(*outlen);
208
178
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
0
              const unsigned char* in, int *inlen) {
229
0
    const unsigned char* processed = in;
230
0
    const unsigned char* outend;
231
0
    const unsigned char* outstart = out;
232
0
    const unsigned char* instart = in;
233
0
    const unsigned char* inend;
234
0
    unsigned int c, d;
235
0
    int trailing;
236
237
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
0
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
0
  *outlen = 0;
243
0
  *inlen = 0;
244
0
  return(0);
245
0
    }
246
0
    inend = in + (*inlen);
247
0
    outend = out + (*outlen);
248
0
    while (in < inend) {
249
0
  d = *in++;
250
0
  if      (d < 0x80)  { c= d; trailing= 0; }
251
0
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
0
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
0
  for ( ; trailing; trailing--) {
271
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
0
      c <<= 6;
274
0
      c |= d & 0x3F;
275
0
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
0
  if (c < 0x80) {
279
0
      if (out >= outend)
280
0
    break;
281
0
      *out++ = c;
282
0
  } else {
283
      /* no chance for this in Ascii */
284
0
      *outlen = out - outstart;
285
0
      *inlen = processed - instart;
286
0
      return(-2);
287
0
  }
288
0
  processed = in;
289
0
    }
290
0
    *outlen = out - outstart;
291
0
    *inlen = processed - instart;
292
0
    return(*outlen);
293
0
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
400
              const unsigned char* in, int *inlen) {
313
400
    unsigned char* outstart = out;
314
400
    const unsigned char* base = in;
315
400
    unsigned char* outend;
316
400
    const unsigned char* inend;
317
400
    const unsigned char* instop;
318
319
400
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
400
    outend = out + *outlen;
323
400
    inend = in + (*inlen);
324
400
    instop = inend;
325
326
446k
    while ((in < inend) && (out < outend - 1)) {
327
445k
  if (*in >= 0x80) {
328
445k
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
445k
            *out++ = ((*in) & 0x3F) | 0x80;
330
445k
      ++in;
331
445k
  }
332
445k
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
3.05M
  while ((in < instop) && (*in < 0x80)) {
334
2.61M
      *out++ = *in++;
335
2.61M
  }
336
445k
    }
337
400
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
400
    *outlen = out - outstart;
341
400
    *inlen = in - base;
342
400
    return(*outlen);
343
400
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
0
{
362
0
    int len;
363
364
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
0
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
0
        *outlen = 0;
369
0
        *inlenb = 0;
370
0
        return(0);
371
0
    }
372
0
    if (*outlen > *inlenb) {
373
0
  len = *inlenb;
374
0
    } else {
375
0
  len = *outlen;
376
0
    }
377
0
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
0
    memcpy(out, inb, len);
386
387
0
    *outlen = len;
388
0
    *inlenb = len;
389
0
    return(*outlen);
390
0
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
0
              const unsigned char* in, int *inlen) {
413
0
    const unsigned char* processed = in;
414
0
    const unsigned char* outend;
415
0
    const unsigned char* outstart = out;
416
0
    const unsigned char* instart = in;
417
0
    const unsigned char* inend;
418
0
    unsigned int c, d;
419
0
    int trailing;
420
421
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
0
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
0
  *outlen = 0;
427
0
  *inlen = 0;
428
0
  return(0);
429
0
    }
430
0
    inend = in + (*inlen);
431
0
    outend = out + (*outlen);
432
0
    while (in < inend) {
433
0
  d = *in++;
434
0
  if      (d < 0x80)  { c= d; trailing= 0; }
435
0
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
0
      *outlen = out - outstart;
438
0
      *inlen = processed - instart;
439
0
      return(-2);
440
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
0
  else {
444
      /* no chance for this in IsoLat1 */
445
0
      *outlen = out - outstart;
446
0
      *inlen = processed - instart;
447
0
      return(-2);
448
0
  }
449
450
0
  if (inend - in < trailing) {
451
0
      break;
452
0
  }
453
454
0
  for ( ; trailing; trailing--) {
455
0
      if (in >= inend)
456
0
    break;
457
0
      if (((d= *in++) & 0xC0) != 0x80) {
458
0
    *outlen = out - outstart;
459
0
    *inlen = processed - instart;
460
0
    return(-2);
461
0
      }
462
0
      c <<= 6;
463
0
      c |= d & 0x3F;
464
0
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
0
  if (c <= 0xFF) {
468
0
      if (out >= outend)
469
0
    break;
470
0
      *out++ = c;
471
0
  } else {
472
      /* no chance for this in IsoLat1 */
473
0
      *outlen = out - outstart;
474
0
      *inlen = processed - instart;
475
0
      return(-2);
476
0
  }
477
0
  processed = in;
478
0
    }
479
0
    *outlen = out - outstart;
480
0
    *inlen = processed - instart;
481
0
    return(*outlen);
482
0
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
2.01k
{
506
2.01k
    unsigned char* outstart = out;
507
2.01k
    const unsigned char* processed = inb;
508
2.01k
    unsigned char* outend;
509
2.01k
    unsigned short* in = (unsigned short*) inb;
510
2.01k
    unsigned short* inend;
511
2.01k
    unsigned int c, d, inlen;
512
2.01k
    unsigned char *tmp;
513
2.01k
    int bits;
514
515
2.01k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
2.01k
    outend = out + *outlen;
520
2.01k
    if ((*inlenb % 2) == 1)
521
988
        (*inlenb)--;
522
2.01k
    inlen = *inlenb / 2;
523
2.01k
    inend = in + inlen;
524
1.52M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
1.52M
        if (xmlLittleEndian) {
526
1.52M
      c= *in++;
527
1.52M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
1.52M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
576
      if (in >= inend) {           /* handle split mutli-byte characters */
535
189
    break;
536
189
      }
537
387
      if (xmlLittleEndian) {
538
387
    d = *in++;
539
387
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
387
            if ((d & 0xFC00) == 0xDC00) {
546
361
                c &= 0x03FF;
547
361
                c <<= 10;
548
361
                c |= d & 0x03FF;
549
361
                c += 0x10000;
550
361
            }
551
26
            else {
552
26
    *outlen = out - outstart;
553
26
    *inlenb = processed - inb;
554
26
          return(-2);
555
26
      }
556
387
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
1.52M
        if (out >= outend)
560
0
      break;
561
1.52M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
1.52M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
1.51M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
361
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
4.56M
        for ( ; bits >= 0; bits-= 6) {
567
3.03M
            if (out >= outend)
568
0
          break;
569
3.03M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
3.03M
        }
571
1.52M
  processed = (const unsigned char*) in;
572
1.52M
    }
573
1.98k
    *outlen = out - outstart;
574
1.98k
    *inlenb = processed - inb;
575
1.98k
    return(*outlen);
576
2.01k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
0
{
596
0
    unsigned short* out = (unsigned short*) outb;
597
0
    const unsigned char* processed = in;
598
0
    const unsigned char *const instart = in;
599
0
    unsigned short* outstart= out;
600
0
    unsigned short* outend;
601
0
    const unsigned char* inend;
602
0
    unsigned int c, d;
603
0
    int trailing;
604
0
    unsigned char *tmp;
605
0
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
0
    if (in == NULL) {
610
0
  *outlen = 0;
611
0
  *inlen = 0;
612
0
  return(0);
613
0
    }
614
0
    inend= in + *inlen;
615
0
    outend = out + (*outlen / 2);
616
0
    while (in < inend) {
617
0
      d= *in++;
618
0
      if      (d < 0x80)  { c= d; trailing= 0; }
619
0
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
0
    *outlen = (out - outstart) * 2;
622
0
    *inlen = processed - instart;
623
0
    return(-2);
624
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
0
      else {
628
  /* no chance for this in UTF-16 */
629
0
  *outlen = (out - outstart) * 2;
630
0
  *inlen = processed - instart;
631
0
  return(-2);
632
0
      }
633
634
0
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
0
      for ( ; trailing; trailing--) {
639
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
0
        break;
641
0
          c <<= 6;
642
0
          c |= d & 0x3F;
643
0
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
0
        if (c < 0x10000) {
647
0
            if (out >= outend)
648
0
          break;
649
0
      if (xmlLittleEndian) {
650
0
    *out++ = c;
651
0
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
0
        }
658
0
        else if (c < 0x110000) {
659
0
            if (out+1 >= outend)
660
0
          break;
661
0
            c -= 0x10000;
662
0
      if (xmlLittleEndian) {
663
0
    *out++ = 0xD800 | (c >> 10);
664
0
    *out++ = 0xDC00 | (c & 0x03FF);
665
0
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
0
        }
679
0
        else
680
0
      break;
681
0
  processed = in;
682
0
    }
683
0
    *outlen = (out - outstart) * 2;
684
0
    *inlen = processed - instart;
685
0
    return(*outlen);
686
0
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
0
{
705
0
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
0
        if (*outlen >= 2) {
710
0
      outb[0] = 0xFF;
711
0
      outb[1] = 0xFE;
712
0
      *outlen = 2;
713
0
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
0
      return(2);
719
0
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
0
    }
724
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
0
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
2.64k
{
749
2.64k
    unsigned char* outstart = out;
750
2.64k
    const unsigned char* processed = inb;
751
2.64k
    unsigned char* outend;
752
2.64k
    unsigned short* in = (unsigned short*) inb;
753
2.64k
    unsigned short* inend;
754
2.64k
    unsigned int c, d, inlen;
755
2.64k
    unsigned char *tmp;
756
2.64k
    int bits;
757
758
2.64k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
2.64k
    outend = out + *outlen;
763
2.64k
    if ((*inlenb % 2) == 1)
764
1.47k
        (*inlenb)--;
765
2.64k
    inlen = *inlenb / 2;
766
2.64k
    inend= in + inlen;
767
1.56M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
1.56M
  if (xmlLittleEndian) {
769
1.56M
      tmp = (unsigned char *) in;
770
1.56M
      c = *tmp++;
771
1.56M
      c = (c << 8) | *tmp;
772
1.56M
      in++;
773
1.56M
  } else {
774
0
      c= *in++;
775
0
  }
776
1.56M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
351
      if (in >= inend) {           /* handle split mutli-byte characters */
778
250
                break;
779
250
      }
780
101
      if (xmlLittleEndian) {
781
101
    tmp = (unsigned char *) in;
782
101
    d = *tmp++;
783
101
    d = (d << 8) | *tmp;
784
101
    in++;
785
101
      } else {
786
0
    d= *in++;
787
0
      }
788
101
            if ((d & 0xFC00) == 0xDC00) {
789
74
                c &= 0x03FF;
790
74
                c <<= 10;
791
74
                c |= d & 0x03FF;
792
74
                c += 0x10000;
793
74
            }
794
27
            else {
795
27
    *outlen = out - outstart;
796
27
    *inlenb = processed - inb;
797
27
          return(-2);
798
27
      }
799
101
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
1.56M
        if (out >= outend)
803
0
      break;
804
1.56M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
1.56M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
1.55M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
74
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
4.68M
        for ( ; bits >= 0; bits-= 6) {
810
3.11M
            if (out >= outend)
811
0
          break;
812
3.11M
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
3.11M
        }
814
1.56M
  processed = (const unsigned char*) in;
815
1.56M
    }
816
2.61k
    *outlen = out - outstart;
817
2.61k
    *inlenb = processed - inb;
818
2.61k
    return(*outlen);
819
2.64k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
0
{
839
0
    unsigned short* out = (unsigned short*) outb;
840
0
    const unsigned char* processed = in;
841
0
    const unsigned char *const instart = in;
842
0
    unsigned short* outstart= out;
843
0
    unsigned short* outend;
844
0
    const unsigned char* inend;
845
0
    unsigned int c, d;
846
0
    int trailing;
847
0
    unsigned char *tmp;
848
0
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
0
    if (in == NULL) {
853
0
  *outlen = 0;
854
0
  *inlen = 0;
855
0
  return(0);
856
0
    }
857
0
    inend= in + *inlen;
858
0
    outend = out + (*outlen / 2);
859
0
    while (in < inend) {
860
0
      d= *in++;
861
0
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
0
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
0
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
0
        if (c < 0x10000) {
889
0
            if (out >= outend)  break;
890
0
      if (xmlLittleEndian) {
891
0
    tmp = (unsigned char *) out;
892
0
    *tmp = c >> 8;
893
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
0
    out++;
895
0
      } else {
896
0
    *out++ = c;
897
0
      }
898
0
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
0
  processed = in;
922
0
    }
923
0
    *outlen = (out - outstart) * 2;
924
0
    *inlen = processed - instart;
925
0
    return(*outlen);
926
0
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
9.88k
{
949
9.88k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
9.88k
    if (len >= 4) {
952
9.88k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
9.88k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
15
      return(XML_CHAR_ENCODING_UCS4BE);
955
9.86k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
9.86k
      (in[2] == 0x00) && (in[3] == 0x00))
957
7
      return(XML_CHAR_ENCODING_UCS4LE);
958
9.86k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
9.86k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
1
      return(XML_CHAR_ENCODING_UCS4_2143);
961
9.86k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
9.86k
      (in[2] == 0x00) && (in[3] == 0x00))
963
0
      return(XML_CHAR_ENCODING_UCS4_3412);
964
9.86k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
9.86k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
178
      return(XML_CHAR_ENCODING_EBCDIC);
967
9.68k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
9.68k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
2.32k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
7.36k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
7.36k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
189
      return(XML_CHAR_ENCODING_UTF16LE);
978
7.17k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
7.17k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
46
      return(XML_CHAR_ENCODING_UTF16BE);
981
7.17k
    }
982
7.12k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
7.12k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
7.12k
      (in[2] == 0xBF))
989
577
      return(XML_CHAR_ENCODING_UTF8);
990
7.12k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
6.55k
    if (len >= 2) {
993
6.55k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
228
      return(XML_CHAR_ENCODING_UTF16BE);
995
6.32k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
53
      return(XML_CHAR_ENCODING_UTF16LE);
997
6.32k
    }
998
6.26k
    return(XML_CHAR_ENCODING_NONE);
999
6.55k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
2.38k
xmlGetEncodingAlias(const char *alias) {
1035
2.38k
    int i;
1036
2.38k
    char upper[100];
1037
1038
2.38k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
2.38k
    if (xmlCharEncodingAliases == NULL)
1042
2.38k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
667
{
1167
667
    const char *alias;
1168
667
    char upper[500];
1169
667
    int i;
1170
1171
667
    if (name == NULL)
1172
0
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
667
    alias = xmlGetEncodingAlias(name);
1178
667
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
7.36k
    for (i = 0;i < 499;i++) {
1182
7.36k
        upper[i] = (char) toupper((unsigned char) name[i]);
1183
7.36k
  if (upper[i] == 0) break;
1184
7.36k
    }
1185
667
    upper[i] = 0;
1186
1187
667
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
667
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
667
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
667
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
667
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
667
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
662
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
662
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
662
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
640
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
640
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
640
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
640
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
636
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
636
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
636
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
635
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
635
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
635
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
635
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
635
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
635
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
635
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
635
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
635
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
635
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
635
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
635
    return(XML_CHAR_ENCODING_ERROR);
1235
635
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
41
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
41
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
8
        case XML_CHAR_ENCODING_EBCDIC:
1262
8
            return("EBCDIC");
1263
22
        case XML_CHAR_ENCODING_UCS4LE:
1264
22
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
1
        case XML_CHAR_ENCODING_UCS4_2143:
1268
1
            return("ISO-10646-UCS-4");
1269
0
        case XML_CHAR_ENCODING_UCS4_3412:
1270
0
            return("ISO-10646-UCS-4");
1271
5
        case XML_CHAR_ENCODING_UCS2:
1272
5
            return("ISO-10646-UCS-2");
1273
4
        case XML_CHAR_ENCODING_8859_1:
1274
4
      return("ISO-8859-1");
1275
1
        case XML_CHAR_ENCODING_8859_2:
1276
1
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
41
    }
1300
0
    return(NULL);
1301
41
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_ICU_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
25.4k
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
1
xmlInitEncodingInternal(void) {
1501
1
    unsigned short int tst = 0x1234;
1502
1
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
1
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
1
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
1
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
10.6k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
10.6k
    xmlCharEncodingHandlerPtr handler;
1592
1593
10.6k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
9.88k
        case XML_CHAR_ENCODING_NONE:
1597
9.88k
      return(NULL);
1598
0
        case XML_CHAR_ENCODING_UTF8:
1599
0
      return(NULL);
1600
242
        case XML_CHAR_ENCODING_UTF16LE:
1601
242
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
274
        case XML_CHAR_ENCODING_UTF16BE:
1603
274
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
178
        case XML_CHAR_ENCODING_EBCDIC:
1605
178
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
178
            if (handler != NULL) return(handler);
1607
178
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
178
            if (handler != NULL) return(handler);
1609
178
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
178
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
15
        case XML_CHAR_ENCODING_UCS4BE:
1615
15
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
15
            if (handler != NULL) return(handler);
1617
15
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
15
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
7
        case XML_CHAR_ENCODING_UCS4LE:
1623
7
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
7
            if (handler != NULL) return(handler);
1625
7
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
7
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
1
        case XML_CHAR_ENCODING_UCS4_2143:
1631
1
      break;
1632
0
        case XML_CHAR_ENCODING_UCS4_3412:
1633
0
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
10.6k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
1
    return(NULL);
1712
10.6k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
1.71k
xmlFindCharEncodingHandler(const char *name) {
1725
1.71k
    const char *nalias;
1726
1.71k
    const char *norig;
1727
1.71k
    xmlCharEncoding alias;
1728
1.71k
#ifdef LIBXML_ICONV_ENABLED
1729
1.71k
    xmlCharEncodingHandlerPtr enc;
1730
1.71k
    iconv_t icv_in, icv_out;
1731
1.71k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
1.71k
    char upper[100];
1737
1.71k
    int i;
1738
1739
1.71k
    if (name == NULL) return(NULL);
1740
1.71k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
1.71k
    norig = name;
1746
1.71k
    nalias = xmlGetEncodingAlias(name);
1747
1.71k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
14.0k
    for (i = 0;i < 99;i++) {
1754
14.0k
        upper[i] = (char) toupper((unsigned char) name[i]);
1755
14.0k
  if (upper[i] == 0) break;
1756
14.0k
    }
1757
1.71k
    upper[i] = 0;
1758
1759
15.0k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
13.4k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
79
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
13.4k
    }
1763
1764
1.63k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
1.63k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
1.63k
    icv_in = iconv_open("UTF-8", name);
1779
1.63k
    icv_out = iconv_open(name, "UTF-8");
1780
1.63k
    if (icv_in == (iconv_t) -1) {
1781
667
        icv_in = iconv_open("UTF-8", upper);
1782
667
    }
1783
1.63k
    if (icv_out == (iconv_t) -1) {
1784
667
  icv_out = iconv_open(upper, "UTF-8");
1785
667
    }
1786
1.63k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
967
      enc = (xmlCharEncodingHandlerPtr)
1788
967
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
967
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
967
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
967
      enc->name = xmlMemStrdup(name);
1796
967
            if (enc->name == NULL) {
1797
0
                xmlFree(enc);
1798
0
                iconv_close(icv_in);
1799
0
                iconv_close(icv_out);
1800
0
                return(NULL);
1801
0
            }
1802
967
      enc->input = NULL;
1803
967
      enc->output = NULL;
1804
967
      enc->iconv_in = icv_in;
1805
967
      enc->iconv_out = icv_out;
1806
#ifdef DEBUG_ENCODING
1807
            xmlGenericError(xmlGenericErrorContext,
1808
        "Found iconv handler for encoding %s\n", name);
1809
#endif
1810
967
      return enc;
1811
967
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1812
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1813
0
        "iconv : problems with filters for '%s'\n", name);
1814
0
      if (icv_in != (iconv_t) -1)
1815
0
    iconv_close(icv_in);
1816
0
      else
1817
0
    iconv_close(icv_out);
1818
0
    }
1819
667
#endif /* LIBXML_ICONV_ENABLED */
1820
#ifdef LIBXML_ICU_ENABLED
1821
    /* check whether icu can handle this */
1822
    ucv_in = openIcuConverter(name, 1);
1823
    ucv_out = openIcuConverter(name, 0);
1824
    if (ucv_in != NULL && ucv_out != NULL) {
1825
      encu = (xmlCharEncodingHandlerPtr)
1826
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1827
      if (encu == NULL) {
1828
                closeIcuConverter(ucv_in);
1829
                closeIcuConverter(ucv_out);
1830
    return(NULL);
1831
      }
1832
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1833
      encu->name = xmlMemStrdup(name);
1834
            if (encu->name == NULL) {
1835
                xmlFree(encu);
1836
                closeIcuConverter(ucv_in);
1837
                closeIcuConverter(ucv_out);
1838
                return(NULL);
1839
            }
1840
      encu->input = NULL;
1841
      encu->output = NULL;
1842
      encu->uconv_in = ucv_in;
1843
      encu->uconv_out = ucv_out;
1844
#ifdef DEBUG_ENCODING
1845
            xmlGenericError(xmlGenericErrorContext,
1846
        "Found ICU converter handler for encoding %s\n", name);
1847
#endif
1848
      return encu;
1849
    } else if (ucv_in != NULL || ucv_out != NULL) {
1850
            closeIcuConverter(ucv_in);
1851
            closeIcuConverter(ucv_out);
1852
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1853
        "ICU converter : problems with filters for '%s'\n", name);
1854
    }
1855
#endif /* LIBXML_ICU_ENABLED */
1856
1857
#ifdef DEBUG_ENCODING
1858
    xmlGenericError(xmlGenericErrorContext,
1859
      "No handler found for encoding %s\n", name);
1860
#endif
1861
1862
    /*
1863
     * Fallback using the canonical names
1864
     */
1865
667
    alias = xmlParseCharEncoding(norig);
1866
667
    if (alias != XML_CHAR_ENCODING_ERROR) {
1867
32
        const char* canon;
1868
32
        canon = xmlGetCharEncodingName(alias);
1869
32
        if ((canon != NULL) && (strcmp(name, canon))) {
1870
7
      return(xmlFindCharEncodingHandler(canon));
1871
7
        }
1872
32
    }
1873
1874
    /* If "none of the above", give up */
1875
660
    return(NULL);
1876
667
}
1877
1878
/************************************************************************
1879
 *                  *
1880
 *    ICONV based generic conversion functions    *
1881
 *                  *
1882
 ************************************************************************/
1883
1884
#ifdef LIBXML_ICONV_ENABLED
1885
/**
1886
 * xmlIconvWrapper:
1887
 * @cd:   iconv converter data structure
1888
 * @out:  a pointer to an array of bytes to store the result
1889
 * @outlen:  the length of @out
1890
 * @in:  a pointer to an array of input bytes
1891
 * @inlen:  the length of @in
1892
 *
1893
 * Returns 0 if success, or
1894
 *     -1 by lack of space, or
1895
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1896
 *        the result of transformation can't fit into the encoding we want), or
1897
 *     -3 if there the last byte can't form a single output char.
1898
 *
1899
 * The value of @inlen after return is the number of octets consumed
1900
 *     as the return value is positive, else unpredictable.
1901
 * The value of @outlen after return is the number of octets produced.
1902
 */
1903
static int
1904
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1905
14.2k
                const unsigned char *in, int *inlen) {
1906
14.2k
    size_t icv_inlen, icv_outlen;
1907
14.2k
    const char *icv_in = (const char *) in;
1908
14.2k
    char *icv_out = (char *) out;
1909
14.2k
    size_t ret;
1910
1911
14.2k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1912
0
        if (outlen != NULL) *outlen = 0;
1913
0
        return(-1);
1914
0
    }
1915
14.2k
    icv_inlen = *inlen;
1916
14.2k
    icv_outlen = *outlen;
1917
    /*
1918
     * Some versions take const, other versions take non-const input.
1919
     */
1920
14.2k
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1921
14.2k
    *inlen -= icv_inlen;
1922
14.2k
    *outlen -= icv_outlen;
1923
14.2k
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1924
2.39k
#ifdef EILSEQ
1925
2.39k
        if (errno == EILSEQ) {
1926
250
            return -2;
1927
250
        } else
1928
2.14k
#endif
1929
2.14k
#ifdef E2BIG
1930
2.14k
        if (errno == E2BIG) {
1931
1.93k
            return -1;
1932
1.93k
        } else
1933
210
#endif
1934
210
#ifdef EINVAL
1935
210
        if (errno == EINVAL) {
1936
210
            return -3;
1937
210
        } else
1938
0
#endif
1939
0
        {
1940
0
            return -3;
1941
0
        }
1942
2.39k
    }
1943
11.8k
    return 0;
1944
14.2k
}
1945
#endif /* LIBXML_ICONV_ENABLED */
1946
1947
/************************************************************************
1948
 *                  *
1949
 *    ICU based generic conversion functions    *
1950
 *                  *
1951
 ************************************************************************/
1952
1953
#ifdef LIBXML_ICU_ENABLED
1954
/**
1955
 * xmlUconvWrapper:
1956
 * @cd: ICU uconverter data structure
1957
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1958
 * @out:  a pointer to an array of bytes to store the result
1959
 * @outlen:  the length of @out
1960
 * @in:  a pointer to an array of input bytes
1961
 * @inlen:  the length of @in
1962
 * @flush: if true, indicates end of input
1963
 *
1964
 * Returns 0 if success, or
1965
 *     -1 by lack of space, or
1966
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1967
 *        the result of transformation can't fit into the encoding we want), or
1968
 *     -3 if there the last byte can't form a single output char.
1969
 *
1970
 * The value of @inlen after return is the number of octets consumed
1971
 *     as the return value is positive, else unpredictable.
1972
 * The value of @outlen after return is the number of octets produced.
1973
 */
1974
static int
1975
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1976
                const unsigned char *in, int *inlen, int flush) {
1977
    const char *ucv_in = (const char *) in;
1978
    char *ucv_out = (char *) out;
1979
    UErrorCode err = U_ZERO_ERROR;
1980
1981
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1982
        if (outlen != NULL) *outlen = 0;
1983
        return(-1);
1984
    }
1985
1986
    if (toUnicode) {
1987
        /* encoding => UTF-16 => UTF-8 */
1988
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1989
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1990
                       &cd->pivot_source, &cd->pivot_target,
1991
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1992
    } else {
1993
        /* UTF-8 => UTF-16 => encoding */
1994
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1995
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1996
                       &cd->pivot_source, &cd->pivot_target,
1997
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1998
    }
1999
    *inlen = ucv_in - (const char*) in;
2000
    *outlen = ucv_out - (char *) out;
2001
    if (U_SUCCESS(err))
2002
        return 0;
2003
    if (err == U_BUFFER_OVERFLOW_ERROR)
2004
        return -1;
2005
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2006
        return -2;
2007
    return -3;
2008
}
2009
#endif /* LIBXML_ICU_ENABLED */
2010
2011
/************************************************************************
2012
 *                  *
2013
 *    The real API used by libxml for on-the-fly conversion *
2014
 *                  *
2015
 ************************************************************************/
2016
2017
/**
2018
 * xmlEncInputChunk:
2019
 * @handler:  encoding handler
2020
 * @out:  a pointer to an array of bytes to store the result
2021
 * @outlen:  the length of @out
2022
 * @in:  a pointer to an array of input bytes
2023
 * @inlen:  the length of @in
2024
 * @flush:  flush (ICU-related)
2025
 *
2026
 * Returns 0 if success, or
2027
 *     -1 by lack of space, or
2028
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2029
 *        the result of transformation can't fit into the encoding we want), or
2030
 *     -3 if there the last byte can't form a single output char.
2031
 *
2032
 * The value of @inlen after return is the number of octets consumed
2033
 *     as the return value is 0, else unpredictable.
2034
 * The value of @outlen after return is the number of octets produced.
2035
 */
2036
int
2037
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2038
19.5k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2039
19.5k
    int ret;
2040
19.5k
    (void)flush;
2041
2042
19.5k
    if (handler->input != NULL) {
2043
5.23k
        ret = handler->input(out, outlen, in, inlen);
2044
5.23k
        if (ret > 0)
2045
3.08k
           ret = 0;
2046
5.23k
    }
2047
14.2k
#ifdef LIBXML_ICONV_ENABLED
2048
14.2k
    else if (handler->iconv_in != NULL) {
2049
14.2k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2050
14.2k
    }
2051
1
#endif /* LIBXML_ICONV_ENABLED */
2052
#ifdef LIBXML_ICU_ENABLED
2053
    else if (handler->uconv_in != NULL) {
2054
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2055
                              flush);
2056
    }
2057
#endif /* LIBXML_ICU_ENABLED */
2058
1
    else {
2059
1
        *outlen = 0;
2060
1
        *inlen = 0;
2061
1
        ret = -2;
2062
1
    }
2063
2064
19.5k
    return(ret);
2065
19.5k
}
2066
2067
/**
2068
 * xmlEncOutputChunk:
2069
 * @handler:  encoding handler
2070
 * @out:  a pointer to an array of bytes to store the result
2071
 * @outlen:  the length of @out
2072
 * @in:  a pointer to an array of input bytes
2073
 * @inlen:  the length of @in
2074
 *
2075
 * Returns 0 if success, or
2076
 *     -1 by lack of space, or
2077
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2078
 *        the result of transformation can't fit into the encoding we want), or
2079
 *     -3 if there the last byte can't form a single output char.
2080
 *     -4 if no output function was found.
2081
 *
2082
 * The value of @inlen after return is the number of octets consumed
2083
 *     as the return value is 0, else unpredictable.
2084
 * The value of @outlen after return is the number of octets produced.
2085
 */
2086
static int
2087
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2088
0
                  int *outlen, const unsigned char *in, int *inlen) {
2089
0
    int ret;
2090
2091
0
    if (handler->output != NULL) {
2092
0
        ret = handler->output(out, outlen, in, inlen);
2093
0
        if (ret > 0)
2094
0
           ret = 0;
2095
0
    }
2096
0
#ifdef LIBXML_ICONV_ENABLED
2097
0
    else if (handler->iconv_out != NULL) {
2098
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2099
0
    }
2100
0
#endif /* LIBXML_ICONV_ENABLED */
2101
#ifdef LIBXML_ICU_ENABLED
2102
    else if (handler->uconv_out != NULL) {
2103
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2104
                              1);
2105
    }
2106
#endif /* LIBXML_ICU_ENABLED */
2107
0
    else {
2108
0
        *outlen = 0;
2109
0
        *inlen = 0;
2110
0
        ret = -4;
2111
0
    }
2112
2113
0
    return(ret);
2114
0
}
2115
2116
/**
2117
 * xmlCharEncFirstLine:
2118
 * @handler:  char encoding transformation data structure
2119
 * @out:  an xmlBuffer for the output.
2120
 * @in:  an xmlBuffer for the input
2121
 *
2122
 * DEPERECATED: Don't use.
2123
 */
2124
int
2125
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2126
0
                    xmlBufferPtr in) {
2127
0
    return(xmlCharEncInFunc(handler, out, in));
2128
0
}
2129
2130
/**
2131
 * xmlCharEncInput:
2132
 * @input: a parser input buffer
2133
 * @flush: try to flush all the raw buffer
2134
 *
2135
 * Generic front-end for the encoding handler on parser input
2136
 *
2137
 * Returns the number of byte written if success, or
2138
 *     -1 general error
2139
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2140
 *        the result of transformation can't fit into the encoding we want), or
2141
 */
2142
int
2143
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2144
19.5k
{
2145
19.5k
    int ret;
2146
19.5k
    size_t written;
2147
19.5k
    size_t toconv;
2148
19.5k
    int c_in;
2149
19.5k
    int c_out;
2150
19.5k
    xmlBufPtr in;
2151
19.5k
    xmlBufPtr out;
2152
2153
19.5k
    if ((input == NULL) || (input->encoder == NULL) ||
2154
19.5k
        (input->buffer == NULL) || (input->raw == NULL))
2155
0
        return (-1);
2156
19.5k
    out = input->buffer;
2157
19.5k
    in = input->raw;
2158
2159
19.5k
    toconv = xmlBufUse(in);
2160
19.5k
    if (toconv == 0)
2161
197
        return (0);
2162
19.3k
    if ((toconv > 64 * 1024) && (flush == 0))
2163
0
        toconv = 64 * 1024;
2164
19.3k
    written = xmlBufAvail(out);
2165
19.3k
    if (toconv * 2 >= written) {
2166
2.67k
        if (xmlBufGrow(out, toconv * 2) < 0)
2167
0
            return (-1);
2168
2.67k
        written = xmlBufAvail(out);
2169
2.67k
    }
2170
19.3k
    if ((written > 128 * 1024) && (flush == 0))
2171
0
        written = 128 * 1024;
2172
2173
19.3k
    c_in = toconv;
2174
19.3k
    c_out = written;
2175
19.3k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2176
19.3k
                           xmlBufContent(in), &c_in, flush);
2177
19.3k
    xmlBufShrink(in, c_in);
2178
19.3k
    xmlBufAddLen(out, c_out);
2179
19.3k
    if (ret == -1)
2180
1.93k
        ret = -3;
2181
2182
19.3k
    switch (ret) {
2183
16.9k
        case 0:
2184
#ifdef DEBUG_ENCODING
2185
            xmlGenericError(xmlGenericErrorContext,
2186
                            "converted %d bytes to %d bytes of input\n",
2187
                            c_in, c_out);
2188
#endif
2189
16.9k
            break;
2190
0
        case -1:
2191
#ifdef DEBUG_ENCODING
2192
            xmlGenericError(xmlGenericErrorContext,
2193
                         "converted %d bytes to %d bytes of input, %d left\n",
2194
                            c_in, c_out, (int)xmlBufUse(in));
2195
#endif
2196
0
            break;
2197
2.14k
        case -3:
2198
#ifdef DEBUG_ENCODING
2199
            xmlGenericError(xmlGenericErrorContext,
2200
                        "converted %d bytes to %d bytes of input, %d left\n",
2201
                            c_in, c_out, (int)xmlBufUse(in));
2202
#endif
2203
2.14k
            break;
2204
299
        case -2: {
2205
299
            char buf[50];
2206
299
            const xmlChar *content = xmlBufContent(in);
2207
2208
299
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2209
299
         content[0], content[1],
2210
299
         content[2], content[3]);
2211
299
      buf[49] = 0;
2212
299
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2213
299
        "input conversion failed due to input error, bytes %s\n",
2214
299
               buf);
2215
299
        }
2216
19.3k
    }
2217
    /*
2218
     * Ignore when input buffer is not on a boundary
2219
     */
2220
19.3k
    if (ret == -3)
2221
2.14k
        ret = 0;
2222
19.3k
    return (c_out? c_out : ret);
2223
19.3k
}
2224
2225
/**
2226
 * xmlCharEncInFunc:
2227
 * @handler:  char encoding transformation data structure
2228
 * @out:  an xmlBuffer for the output.
2229
 * @in:  an xmlBuffer for the input
2230
 *
2231
 * Generic front-end for the encoding handler input function
2232
 *
2233
 * Returns the number of byte written if success, or
2234
 *     -1 general error
2235
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2236
 *        the result of transformation can't fit into the encoding we want), or
2237
 */
2238
int
2239
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2240
                 xmlBufferPtr in)
2241
0
{
2242
0
    int ret;
2243
0
    int written;
2244
0
    int toconv;
2245
2246
0
    if (handler == NULL)
2247
0
        return (-1);
2248
0
    if (out == NULL)
2249
0
        return (-1);
2250
0
    if (in == NULL)
2251
0
        return (-1);
2252
2253
0
    toconv = in->use;
2254
0
    if (toconv == 0)
2255
0
        return (0);
2256
0
    written = out->size - out->use -1; /* count '\0' */
2257
0
    if (toconv * 2 >= written) {
2258
0
        xmlBufferGrow(out, out->size + toconv * 2);
2259
0
        written = out->size - out->use - 1;
2260
0
    }
2261
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2262
0
                           in->content, &toconv, 1);
2263
0
    xmlBufferShrink(in, toconv);
2264
0
    out->use += written;
2265
0
    out->content[out->use] = 0;
2266
0
    if (ret == -1)
2267
0
        ret = -3;
2268
2269
0
    switch (ret) {
2270
0
        case 0:
2271
#ifdef DEBUG_ENCODING
2272
            xmlGenericError(xmlGenericErrorContext,
2273
                            "converted %d bytes to %d bytes of input\n",
2274
                            toconv, written);
2275
#endif
2276
0
            break;
2277
0
        case -1:
2278
#ifdef DEBUG_ENCODING
2279
            xmlGenericError(xmlGenericErrorContext,
2280
                         "converted %d bytes to %d bytes of input, %d left\n",
2281
                            toconv, written, in->use);
2282
#endif
2283
0
            break;
2284
0
        case -3:
2285
#ifdef DEBUG_ENCODING
2286
            xmlGenericError(xmlGenericErrorContext,
2287
                        "converted %d bytes to %d bytes of input, %d left\n",
2288
                            toconv, written, in->use);
2289
#endif
2290
0
            break;
2291
0
        case -2: {
2292
0
            char buf[50];
2293
2294
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2295
0
         in->content[0], in->content[1],
2296
0
         in->content[2], in->content[3]);
2297
0
      buf[49] = 0;
2298
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2299
0
        "input conversion failed due to input error, bytes %s\n",
2300
0
               buf);
2301
0
        }
2302
0
    }
2303
    /*
2304
     * Ignore when input buffer is not on a boundary
2305
     */
2306
0
    if (ret == -3)
2307
0
        ret = 0;
2308
0
    return (written? written : ret);
2309
0
}
2310
2311
#ifdef LIBXML_OUTPUT_ENABLED
2312
/**
2313
 * xmlCharEncOutput:
2314
 * @output: a parser output buffer
2315
 * @init: is this an initialization call without data
2316
 *
2317
 * Generic front-end for the encoding handler on parser output
2318
 * a first call with @init == 1 has to be made first to initiate the
2319
 * output in case of non-stateless encoding needing to initiate their
2320
 * state or the output (like the BOM in UTF16).
2321
 * In case of UTF8 sequence conversion errors for the given encoder,
2322
 * the content will be automatically remapped to a CharRef sequence.
2323
 *
2324
 * Returns the number of byte written if success, or
2325
 *     -1 general error
2326
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2327
 *        the result of transformation can't fit into the encoding we want), or
2328
 */
2329
int
2330
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2331
0
{
2332
0
    int ret;
2333
0
    size_t written;
2334
0
    int writtentot = 0;
2335
0
    size_t toconv;
2336
0
    int c_in;
2337
0
    int c_out;
2338
0
    xmlBufPtr in;
2339
0
    xmlBufPtr out;
2340
2341
0
    if ((output == NULL) || (output->encoder == NULL) ||
2342
0
        (output->buffer == NULL) || (output->conv == NULL))
2343
0
        return (-1);
2344
0
    out = output->conv;
2345
0
    in = output->buffer;
2346
2347
0
retry:
2348
2349
0
    written = xmlBufAvail(out);
2350
2351
    /*
2352
     * First specific handling of the initialization call
2353
     */
2354
0
    if (init) {
2355
0
        c_in = 0;
2356
0
        c_out = written;
2357
        /* TODO: Check return value. */
2358
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2359
0
                          NULL, &c_in);
2360
0
        xmlBufAddLen(out, c_out);
2361
#ifdef DEBUG_ENCODING
2362
  xmlGenericError(xmlGenericErrorContext,
2363
    "initialized encoder\n");
2364
#endif
2365
0
        return(c_out);
2366
0
    }
2367
2368
    /*
2369
     * Conversion itself.
2370
     */
2371
0
    toconv = xmlBufUse(in);
2372
0
    if (toconv == 0)
2373
0
        return (writtentot);
2374
0
    if (toconv > 64 * 1024)
2375
0
        toconv = 64 * 1024;
2376
0
    if (toconv * 4 >= written) {
2377
0
        xmlBufGrow(out, toconv * 4);
2378
0
        written = xmlBufAvail(out);
2379
0
    }
2380
0
    if (written > 256 * 1024)
2381
0
        written = 256 * 1024;
2382
2383
0
    c_in = toconv;
2384
0
    c_out = written;
2385
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2386
0
                            xmlBufContent(in), &c_in);
2387
0
    xmlBufShrink(in, c_in);
2388
0
    xmlBufAddLen(out, c_out);
2389
0
    writtentot += c_out;
2390
0
    if (ret == -1) {
2391
0
        if (c_out > 0) {
2392
            /* Can be a limitation of iconv or uconv */
2393
0
            goto retry;
2394
0
        }
2395
0
        ret = -3;
2396
0
    }
2397
2398
    /*
2399
     * Attempt to handle error cases
2400
     */
2401
0
    switch (ret) {
2402
0
        case 0:
2403
#ifdef DEBUG_ENCODING
2404
      xmlGenericError(xmlGenericErrorContext,
2405
        "converted %d bytes to %d bytes of output\n",
2406
              c_in, c_out);
2407
#endif
2408
0
      break;
2409
0
        case -1:
2410
#ifdef DEBUG_ENCODING
2411
      xmlGenericError(xmlGenericErrorContext,
2412
        "output conversion failed by lack of space\n");
2413
#endif
2414
0
      break;
2415
0
        case -3:
2416
#ifdef DEBUG_ENCODING
2417
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2418
              c_in, c_out, (int) xmlBufUse(in));
2419
#endif
2420
0
      break;
2421
0
        case -4:
2422
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2423
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2424
0
            ret = -1;
2425
0
            break;
2426
0
        case -2: {
2427
0
      xmlChar charref[20];
2428
0
      int len = xmlBufUse(in);
2429
0
            xmlChar *content = xmlBufContent(in);
2430
0
      int cur, charrefLen;
2431
2432
0
      cur = xmlGetUTF8Char(content, &len);
2433
0
      if (cur <= 0)
2434
0
                break;
2435
2436
#ifdef DEBUG_ENCODING
2437
            xmlGenericError(xmlGenericErrorContext,
2438
                    "handling output conversion error\n");
2439
            xmlGenericError(xmlGenericErrorContext,
2440
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2441
                    content[0], content[1],
2442
                    content[2], content[3]);
2443
#endif
2444
            /*
2445
             * Removes the UTF8 sequence, and replace it by a charref
2446
             * and continue the transcoding phase, hoping the error
2447
             * did not mangle the encoder state.
2448
             */
2449
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2450
0
                             "&#%d;", cur);
2451
0
            xmlBufShrink(in, len);
2452
0
            xmlBufGrow(out, charrefLen * 4);
2453
0
            c_out = xmlBufAvail(out);
2454
0
            c_in = charrefLen;
2455
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2456
0
                                    charref, &c_in);
2457
2458
0
      if ((ret < 0) || (c_in != charrefLen)) {
2459
0
    char buf[50];
2460
2461
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2462
0
       content[0], content[1],
2463
0
       content[2], content[3]);
2464
0
    buf[49] = 0;
2465
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2466
0
        "output conversion failed due to conv error, bytes %s\n",
2467
0
             buf);
2468
0
    content[0] = ' ';
2469
0
                break;
2470
0
      }
2471
2472
0
            xmlBufAddLen(out, c_out);
2473
0
            writtentot += c_out;
2474
0
            goto retry;
2475
0
  }
2476
0
    }
2477
0
    return(writtentot ? writtentot : ret);
2478
0
}
2479
#endif
2480
2481
/**
2482
 * xmlCharEncOutFunc:
2483
 * @handler:  char encoding transformation data structure
2484
 * @out:  an xmlBuffer for the output.
2485
 * @in:  an xmlBuffer for the input
2486
 *
2487
 * Generic front-end for the encoding handler output function
2488
 * a first call with @in == NULL has to be made firs to initiate the
2489
 * output in case of non-stateless encoding needing to initiate their
2490
 * state or the output (like the BOM in UTF16).
2491
 * In case of UTF8 sequence conversion errors for the given encoder,
2492
 * the content will be automatically remapped to a CharRef sequence.
2493
 *
2494
 * Returns the number of byte written if success, or
2495
 *     -1 general error
2496
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2497
 *        the result of transformation can't fit into the encoding we want), or
2498
 */
2499
int
2500
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2501
0
                  xmlBufferPtr in) {
2502
0
    int ret;
2503
0
    int written;
2504
0
    int writtentot = 0;
2505
0
    int toconv;
2506
2507
0
    if (handler == NULL) return(-1);
2508
0
    if (out == NULL) return(-1);
2509
2510
0
retry:
2511
2512
0
    written = out->size - out->use;
2513
2514
0
    if (written > 0)
2515
0
  written--; /* Gennady: count '/0' */
2516
2517
    /*
2518
     * First specific handling of in = NULL, i.e. the initialization call
2519
     */
2520
0
    if (in == NULL) {
2521
0
        toconv = 0;
2522
        /* TODO: Check return value. */
2523
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2524
0
                          NULL, &toconv);
2525
0
        out->use += written;
2526
0
        out->content[out->use] = 0;
2527
#ifdef DEBUG_ENCODING
2528
  xmlGenericError(xmlGenericErrorContext,
2529
    "initialized encoder\n");
2530
#endif
2531
0
        return(0);
2532
0
    }
2533
2534
    /*
2535
     * Conversion itself.
2536
     */
2537
0
    toconv = in->use;
2538
0
    if (toconv == 0)
2539
0
  return(0);
2540
0
    if (toconv * 4 >= written) {
2541
0
        xmlBufferGrow(out, toconv * 4);
2542
0
  written = out->size - out->use - 1;
2543
0
    }
2544
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2545
0
                            in->content, &toconv);
2546
0
    xmlBufferShrink(in, toconv);
2547
0
    out->use += written;
2548
0
    writtentot += written;
2549
0
    out->content[out->use] = 0;
2550
0
    if (ret == -1) {
2551
0
        if (written > 0) {
2552
            /* Can be a limitation of iconv or uconv */
2553
0
            goto retry;
2554
0
        }
2555
0
        ret = -3;
2556
0
    }
2557
2558
    /*
2559
     * Attempt to handle error cases
2560
     */
2561
0
    switch (ret) {
2562
0
        case 0:
2563
#ifdef DEBUG_ENCODING
2564
      xmlGenericError(xmlGenericErrorContext,
2565
        "converted %d bytes to %d bytes of output\n",
2566
              toconv, written);
2567
#endif
2568
0
      break;
2569
0
        case -1:
2570
#ifdef DEBUG_ENCODING
2571
      xmlGenericError(xmlGenericErrorContext,
2572
        "output conversion failed by lack of space\n");
2573
#endif
2574
0
      break;
2575
0
        case -3:
2576
#ifdef DEBUG_ENCODING
2577
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2578
              toconv, written, in->use);
2579
#endif
2580
0
      break;
2581
0
        case -4:
2582
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2583
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2584
0
      ret = -1;
2585
0
            break;
2586
0
        case -2: {
2587
0
      xmlChar charref[20];
2588
0
      int len = in->use;
2589
0
      const xmlChar *utf = (const xmlChar *) in->content;
2590
0
      int cur, charrefLen;
2591
2592
0
      cur = xmlGetUTF8Char(utf, &len);
2593
0
      if (cur <= 0)
2594
0
                break;
2595
2596
#ifdef DEBUG_ENCODING
2597
            xmlGenericError(xmlGenericErrorContext,
2598
                    "handling output conversion error\n");
2599
            xmlGenericError(xmlGenericErrorContext,
2600
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2601
                    in->content[0], in->content[1],
2602
                    in->content[2], in->content[3]);
2603
#endif
2604
            /*
2605
             * Removes the UTF8 sequence, and replace it by a charref
2606
             * and continue the transcoding phase, hoping the error
2607
             * did not mangle the encoder state.
2608
             */
2609
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2610
0
                             "&#%d;", cur);
2611
0
            xmlBufferShrink(in, len);
2612
0
            xmlBufferGrow(out, charrefLen * 4);
2613
0
      written = out->size - out->use - 1;
2614
0
            toconv = charrefLen;
2615
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2616
0
                                    charref, &toconv);
2617
2618
0
      if ((ret < 0) || (toconv != charrefLen)) {
2619
0
    char buf[50];
2620
2621
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2622
0
       in->content[0], in->content[1],
2623
0
       in->content[2], in->content[3]);
2624
0
    buf[49] = 0;
2625
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2626
0
        "output conversion failed due to conv error, bytes %s\n",
2627
0
             buf);
2628
0
    in->content[0] = ' ';
2629
0
          break;
2630
0
      }
2631
2632
0
            out->use += written;
2633
0
            writtentot += written;
2634
0
            out->content[out->use] = 0;
2635
0
            goto retry;
2636
0
  }
2637
0
    }
2638
0
    return(writtentot ? writtentot : ret);
2639
0
}
2640
2641
/**
2642
 * xmlCharEncCloseFunc:
2643
 * @handler:  char encoding transformation data structure
2644
 *
2645
 * Generic front-end for encoding handler close function
2646
 *
2647
 * Returns 0 if success, or -1 in case of error
2648
 */
2649
int
2650
1.56k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2651
1.56k
    int ret = 0;
2652
1.56k
    int tofree = 0;
2653
1.56k
    int i = 0;
2654
2655
1.56k
    if (handler == NULL) return(-1);
2656
2657
10.3k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2658
9.43k
        if (handler == &defaultHandlers[i])
2659
595
            return(0);
2660
9.43k
    }
2661
2662
967
    if (handlers != NULL) {
2663
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2664
0
            if (handler == handlers[i])
2665
0
                return(0);
2666
0
  }
2667
0
    }
2668
967
#ifdef LIBXML_ICONV_ENABLED
2669
    /*
2670
     * Iconv handlers can be used only once, free the whole block.
2671
     * and the associated icon resources.
2672
     */
2673
967
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2674
967
        tofree = 1;
2675
967
  if (handler->iconv_out != NULL) {
2676
967
      if (iconv_close(handler->iconv_out))
2677
0
    ret = -1;
2678
967
      handler->iconv_out = NULL;
2679
967
  }
2680
967
  if (handler->iconv_in != NULL) {
2681
967
      if (iconv_close(handler->iconv_in))
2682
0
    ret = -1;
2683
967
      handler->iconv_in = NULL;
2684
967
  }
2685
967
    }
2686
967
#endif /* LIBXML_ICONV_ENABLED */
2687
#ifdef LIBXML_ICU_ENABLED
2688
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2689
        tofree = 1;
2690
  if (handler->uconv_out != NULL) {
2691
      closeIcuConverter(handler->uconv_out);
2692
      handler->uconv_out = NULL;
2693
  }
2694
  if (handler->uconv_in != NULL) {
2695
      closeIcuConverter(handler->uconv_in);
2696
      handler->uconv_in = NULL;
2697
  }
2698
    }
2699
#endif
2700
967
    if (tofree) {
2701
        /* free up only dynamic handlers iconv/uconv */
2702
967
        if (handler->name != NULL)
2703
967
            xmlFree(handler->name);
2704
967
        handler->name = NULL;
2705
967
        xmlFree(handler);
2706
967
    }
2707
#ifdef DEBUG_ENCODING
2708
    if (ret)
2709
        xmlGenericError(xmlGenericErrorContext,
2710
    "failed to close the encoding handler\n");
2711
    else
2712
        xmlGenericError(xmlGenericErrorContext,
2713
    "closed the encoding handler\n");
2714
#endif
2715
2716
967
    return(ret);
2717
967
}
2718
2719
/**
2720
 * xmlByteConsumed:
2721
 * @ctxt: an XML parser context
2722
 *
2723
 * This function provides the current index of the parser relative
2724
 * to the start of the current entity. This function is computed in
2725
 * bytes from the beginning starting at zero and finishing at the
2726
 * size in byte of the file if parsing a file. The function is
2727
 * of constant cost if the input is UTF-8 but can be costly if run
2728
 * on non-UTF-8 input.
2729
 *
2730
 * Returns the index in bytes from the beginning of the entity or -1
2731
 *         in case the index could not be computed.
2732
 */
2733
long
2734
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2735
0
    xmlParserInputPtr in;
2736
2737
0
    if (ctxt == NULL) return(-1);
2738
0
    in = ctxt->input;
2739
0
    if (in == NULL)  return(-1);
2740
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2741
0
        unsigned int unused = 0;
2742
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2743
        /*
2744
   * Encoding conversion, compute the number of unused original
2745
   * bytes from the input not consumed and subtract that from
2746
   * the raw consumed value, this is not a cheap operation
2747
   */
2748
0
        if (in->end - in->cur > 0) {
2749
0
      unsigned char convbuf[32000];
2750
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2751
0
      int toconv = in->end - in->cur, written = 32000;
2752
2753
0
      int ret;
2754
2755
0
            do {
2756
0
                toconv = in->end - cur;
2757
0
                written = 32000;
2758
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2759
0
                                        cur, &toconv);
2760
0
                if (ret < 0) {
2761
0
                    if (written > 0)
2762
0
                        ret = -2;
2763
0
                    else
2764
0
                        return(-1);
2765
0
                }
2766
0
                unused += written;
2767
0
                cur += toconv;
2768
0
            } while (ret == -2);
2769
0
  }
2770
0
  if (in->buf->rawconsumed < unused)
2771
0
      return(-1);
2772
0
  return(in->buf->rawconsumed - unused);
2773
0
    }
2774
0
    return(in->consumed + (in->cur - in->base));
2775
0
}
2776
2777
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2778
#ifdef LIBXML_ISO8859X_ENABLED
2779
2780
/**
2781
 * UTF8ToISO8859x:
2782
 * @out:  a pointer to an array of bytes to store the result
2783
 * @outlen:  the length of @out
2784
 * @in:  a pointer to an array of UTF-8 chars
2785
 * @inlen:  the length of @in
2786
 * @xlattable: the 2-level transcoding table
2787
 *
2788
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2789
 * block of chars out.
2790
 *
2791
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2792
 * The value of @inlen after return is the number of octets consumed
2793
 *     as the return value is positive, else unpredictable.
2794
 * The value of @outlen after return is the number of octets consumed.
2795
 */
2796
static int
2797
UTF8ToISO8859x(unsigned char* out, int *outlen,
2798
              const unsigned char* in, int *inlen,
2799
              const unsigned char* const xlattable) {
2800
    const unsigned char* outstart = out;
2801
    const unsigned char* inend;
2802
    const unsigned char* instart = in;
2803
    const unsigned char* processed = in;
2804
2805
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2806
        (xlattable == NULL))
2807
  return(-1);
2808
    if (in == NULL) {
2809
        /*
2810
        * initialization nothing to do
2811
        */
2812
        *outlen = 0;
2813
        *inlen = 0;
2814
        return(0);
2815
    }
2816
    inend = in + (*inlen);
2817
    while (in < inend) {
2818
        unsigned char d = *in++;
2819
        if  (d < 0x80)  {
2820
            *out++ = d;
2821
        } else if (d < 0xC0) {
2822
            /* trailing byte in leading position */
2823
            *outlen = out - outstart;
2824
            *inlen = processed - instart;
2825
            return(-2);
2826
        } else if (d < 0xE0) {
2827
            unsigned char c;
2828
            if (!(in < inend)) {
2829
                /* trailing byte not in input buffer */
2830
                *outlen = out - outstart;
2831
                *inlen = processed - instart;
2832
                return(-3);
2833
            }
2834
            c = *in++;
2835
            if ((c & 0xC0) != 0x80) {
2836
                /* not a trailing byte */
2837
                *outlen = out - outstart;
2838
                *inlen = processed - instart;
2839
                return(-2);
2840
            }
2841
            c = c & 0x3F;
2842
            d = d & 0x1F;
2843
            d = xlattable [48 + c + xlattable [d] * 64];
2844
            if (d == 0) {
2845
                /* not in character set */
2846
                *outlen = out - outstart;
2847
                *inlen = processed - instart;
2848
                return(-2);
2849
            }
2850
            *out++ = d;
2851
        } else if (d < 0xF0) {
2852
            unsigned char c1;
2853
            unsigned char c2;
2854
            if (!(in < inend - 1)) {
2855
                /* trailing bytes not in input buffer */
2856
                *outlen = out - outstart;
2857
                *inlen = processed - instart;
2858
                return(-3);
2859
            }
2860
            c1 = *in++;
2861
            if ((c1 & 0xC0) != 0x80) {
2862
                /* not a trailing byte (c1) */
2863
                *outlen = out - outstart;
2864
                *inlen = processed - instart;
2865
                return(-2);
2866
            }
2867
            c2 = *in++;
2868
            if ((c2 & 0xC0) != 0x80) {
2869
                /* not a trailing byte (c2) */
2870
                *outlen = out - outstart;
2871
                *inlen = processed - instart;
2872
                return(-2);
2873
            }
2874
            c1 = c1 & 0x3F;
2875
            c2 = c2 & 0x3F;
2876
      d = d & 0x0F;
2877
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2878
      xlattable [32 + d] * 64] * 64];
2879
            if (d == 0) {
2880
                /* not in character set */
2881
                *outlen = out - outstart;
2882
                *inlen = processed - instart;
2883
                return(-2);
2884
            }
2885
            *out++ = d;
2886
        } else {
2887
            /* cannot transcode >= U+010000 */
2888
            *outlen = out - outstart;
2889
            *inlen = processed - instart;
2890
            return(-2);
2891
        }
2892
        processed = in;
2893
    }
2894
    *outlen = out - outstart;
2895
    *inlen = processed - instart;
2896
    return(*outlen);
2897
}
2898
2899
/**
2900
 * ISO8859xToUTF8
2901
 * @out:  a pointer to an array of bytes to store the result
2902
 * @outlen:  the length of @out
2903
 * @in:  a pointer to an array of ISO Latin 1 chars
2904
 * @inlen:  the length of @in
2905
 *
2906
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2907
 * block of chars out.
2908
 * Returns 0 if success, or -1 otherwise
2909
 * The value of @inlen after return is the number of octets consumed
2910
 * The value of @outlen after return is the number of octets produced.
2911
 */
2912
static int
2913
ISO8859xToUTF8(unsigned char* out, int *outlen,
2914
              const unsigned char* in, int *inlen,
2915
              unsigned short const *unicodetable) {
2916
    unsigned char* outstart = out;
2917
    unsigned char* outend;
2918
    const unsigned char* instart = in;
2919
    const unsigned char* inend;
2920
    const unsigned char* instop;
2921
    unsigned int c;
2922
2923
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2924
        (in == NULL) || (unicodetable == NULL))
2925
  return(-1);
2926
    outend = out + *outlen;
2927
    inend = in + *inlen;
2928
    instop = inend;
2929
2930
    while ((in < inend) && (out < outend - 2)) {
2931
        if (*in >= 0x80) {
2932
            c = unicodetable [*in - 0x80];
2933
            if (c == 0) {
2934
                /* undefined code point */
2935
                *outlen = out - outstart;
2936
                *inlen = in - instart;
2937
                return (-1);
2938
            }
2939
            if (c < 0x800) {
2940
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2941
                *out++ = (c & 0x3F) | 0x80;
2942
            } else {
2943
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2944
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2945
                *out++ = (c & 0x3F) | 0x80;
2946
            }
2947
            ++in;
2948
        }
2949
        if (instop - in > outend - out) instop = in + (outend - out);
2950
        while ((*in < 0x80) && (in < instop)) {
2951
            *out++ = *in++;
2952
        }
2953
    }
2954
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2955
        *out++ =  *in++;
2956
    }
2957
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2958
        *out++ =  *in++;
2959
    }
2960
    *outlen = out - outstart;
2961
    *inlen = in - instart;
2962
    return (*outlen);
2963
}
2964
2965
2966
/************************************************************************
2967
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2968
 ************************************************************************/
2969
2970
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2971
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2972
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2973
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2974
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2975
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2976
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2977
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2978
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2979
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2980
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2981
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2982
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2983
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2984
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2985
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2986
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2987
};
2988
2989
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2990
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2991
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2998
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2999
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3000
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3001
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3002
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3003
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3005
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3006
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3007
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3010
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3011
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3012
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3013
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3014
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3015
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3016
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3017
};
3018
3019
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3020
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3021
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3022
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3023
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3024
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3025
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3026
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3027
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3028
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3029
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3030
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3031
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3032
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3033
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3034
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3035
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3036
};
3037
3038
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3039
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3040
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3047
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3048
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3049
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3050
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3051
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3052
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3053
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3056
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3064
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3065
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3066
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3067
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3068
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3069
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3070
};
3071
3072
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3073
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3074
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3075
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3076
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3077
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3078
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3079
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3080
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3081
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3082
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3083
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3084
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3085
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3086
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3087
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3088
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3089
};
3090
3091
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3092
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3093
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3100
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3101
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3102
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3103
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3104
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3105
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3106
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3107
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3110
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3111
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3112
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3113
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3116
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3117
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3118
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3119
};
3120
3121
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3122
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3123
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3124
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3125
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3126
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3127
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3128
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3129
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3130
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3131
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3132
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3133
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3134
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3135
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3136
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3137
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3138
};
3139
3140
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3141
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3149
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3150
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3153
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3154
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3155
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3156
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3157
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3158
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
};
3169
3170
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3171
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3172
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3173
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3174
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3175
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3176
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3177
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3178
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3179
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3180
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3181
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3182
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3183
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3184
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3185
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3186
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3187
};
3188
3189
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3190
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3192
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3198
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3199
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3200
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3207
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3208
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3209
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3210
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
};
3214
3215
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3216
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3217
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3218
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3219
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3220
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3221
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3222
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3223
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3224
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3225
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3226
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3227
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3228
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3229
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3230
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3231
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3232
};
3233
3234
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3235
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3236
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3243
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3244
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3245
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3246
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3252
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3259
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3260
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3261
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3262
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3263
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266
};
3267
3268
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3269
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3270
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3271
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3272
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3273
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3274
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3275
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3276
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3277
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3278
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3279
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3280
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3281
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3282
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3283
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3284
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3285
};
3286
3287
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3288
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3290
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3296
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3297
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3298
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3299
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3312
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3317
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
};
3320
3321
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3322
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3323
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3324
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3325
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3326
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3327
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3328
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3329
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3330
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3331
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3332
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3333
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3334
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3335
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3336
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3337
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3338
};
3339
3340
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3341
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3349
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3350
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3351
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3352
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3353
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3354
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3355
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3356
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3358
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
};
3365
3366
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3367
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3368
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3369
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3370
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3371
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3372
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3373
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3374
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3375
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3376
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3377
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3378
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3379
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3380
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3381
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3382
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3383
};
3384
3385
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3386
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3394
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3395
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3396
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3398
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3399
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3400
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3401
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3402
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3404
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3405
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3414
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3415
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3416
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3417
};
3418
3419
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3420
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3421
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3422
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3423
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3424
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3425
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3426
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3427
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3428
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3429
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3430
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3431
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3432
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3433
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3434
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3435
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3436
};
3437
3438
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3439
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3447
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3448
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3454
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3455
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3456
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3457
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3458
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3463
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
};
3467
3468
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3469
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3470
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3471
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3472
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3473
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3474
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3475
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3476
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3477
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3478
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3479
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3480
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3481
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3482
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3483
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3484
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3485
};
3486
3487
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3488
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3496
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3497
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3498
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3499
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3500
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3502
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3503
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3509
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3511
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3512
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3513
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3514
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3515
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3516
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3518
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3519
};
3520
3521
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3522
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3523
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3524
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3525
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3526
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3527
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3528
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3529
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3530
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3531
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3532
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3533
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3534
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3535
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3536
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3537
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3538
};
3539
3540
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3541
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3549
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3550
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3551
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3553
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3556
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3558
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3576
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3581
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3582
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3583
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3584
};
3585
3586
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3587
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3588
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3589
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3590
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3591
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3592
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3593
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3594
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3595
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3596
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3597
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3598
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3599
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3600
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3601
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3602
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3603
};
3604
3605
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3606
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3614
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3615
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3616
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3617
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3629
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3630
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3631
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3632
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3633
};
3634
3635
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3636
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3637
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3638
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3639
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3640
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3641
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3642
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3643
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3644
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3645
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3646
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3647
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3648
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3649
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3650
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3651
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3652
};
3653
3654
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3655
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3656
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3657
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3663
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3664
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3665
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3666
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3667
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3668
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3672
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3674
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3681
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3691
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3692
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3693
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3694
};
3695
3696
3697
/*
3698
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3699
 */
3700
3701
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3702
    const unsigned char* in, int *inlen) {
3703
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3704
}
3705
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3706
    const unsigned char* in, int *inlen) {
3707
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3708
}
3709
3710
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3711
    const unsigned char* in, int *inlen) {
3712
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3713
}
3714
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3715
    const unsigned char* in, int *inlen) {
3716
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3717
}
3718
3719
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3720
    const unsigned char* in, int *inlen) {
3721
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3722
}
3723
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3724
    const unsigned char* in, int *inlen) {
3725
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3726
}
3727
3728
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3729
    const unsigned char* in, int *inlen) {
3730
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3731
}
3732
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3733
    const unsigned char* in, int *inlen) {
3734
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3735
}
3736
3737
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3738
    const unsigned char* in, int *inlen) {
3739
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3740
}
3741
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3742
    const unsigned char* in, int *inlen) {
3743
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3744
}
3745
3746
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3747
    const unsigned char* in, int *inlen) {
3748
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3749
}
3750
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3751
    const unsigned char* in, int *inlen) {
3752
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3753
}
3754
3755
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3756
    const unsigned char* in, int *inlen) {
3757
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3758
}
3759
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3760
    const unsigned char* in, int *inlen) {
3761
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3762
}
3763
3764
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3765
    const unsigned char* in, int *inlen) {
3766
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3767
}
3768
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3769
    const unsigned char* in, int *inlen) {
3770
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3771
}
3772
3773
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3774
    const unsigned char* in, int *inlen) {
3775
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3776
}
3777
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3778
    const unsigned char* in, int *inlen) {
3779
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3780
}
3781
3782
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3783
    const unsigned char* in, int *inlen) {
3784
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3785
}
3786
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3787
    const unsigned char* in, int *inlen) {
3788
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3789
}
3790
3791
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3792
    const unsigned char* in, int *inlen) {
3793
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3794
}
3795
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3796
    const unsigned char* in, int *inlen) {
3797
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3798
}
3799
3800
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3801
    const unsigned char* in, int *inlen) {
3802
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3803
}
3804
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3805
    const unsigned char* in, int *inlen) {
3806
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3807
}
3808
3809
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3810
    const unsigned char* in, int *inlen) {
3811
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3812
}
3813
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3814
    const unsigned char* in, int *inlen) {
3815
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3816
}
3817
3818
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3819
    const unsigned char* in, int *inlen) {
3820
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3821
}
3822
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3823
    const unsigned char* in, int *inlen) {
3824
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3825
}
3826
3827
#endif
3828
#endif
3829