Coverage Report

Created: 2023-03-26 06:14

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
42.5k
{
103
42.5k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
42.5k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
42.5k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
42.5k
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
668
              const unsigned char* in, int *inlen) {
182
668
    unsigned char* outstart = out;
183
668
    const unsigned char* base = in;
184
668
    const unsigned char* processed = in;
185
668
    unsigned char* outend = out + *outlen;
186
668
    const unsigned char* inend;
187
668
    unsigned int c;
188
189
668
    inend = in + (*inlen);
190
1.85k
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
1.80k
  c= *in++;
192
193
1.80k
        if (out >= outend)
194
0
      break;
195
1.80k
        if (c < 0x80) {
196
1.18k
      *out++ = c;
197
1.18k
  } else {
198
614
      *outlen = out - outstart;
199
614
      *inlen = processed - base;
200
614
      return(-1);
201
614
  }
202
203
1.18k
  processed = (const unsigned char*) in;
204
1.18k
    }
205
54
    *outlen = out - outstart;
206
54
    *inlen = processed - base;
207
54
    return(*outlen);
208
668
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
0
              const unsigned char* in, int *inlen) {
229
0
    const unsigned char* processed = in;
230
0
    const unsigned char* outend;
231
0
    const unsigned char* outstart = out;
232
0
    const unsigned char* instart = in;
233
0
    const unsigned char* inend;
234
0
    unsigned int c, d;
235
0
    int trailing;
236
237
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
0
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
0
  *outlen = 0;
243
0
  *inlen = 0;
244
0
  return(0);
245
0
    }
246
0
    inend = in + (*inlen);
247
0
    outend = out + (*outlen);
248
0
    while (in < inend) {
249
0
  d = *in++;
250
0
  if      (d < 0x80)  { c= d; trailing= 0; }
251
0
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
0
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
0
  for ( ; trailing; trailing--) {
271
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
0
      c <<= 6;
274
0
      c |= d & 0x3F;
275
0
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
0
  if (c < 0x80) {
279
0
      if (out >= outend)
280
0
    break;
281
0
      *out++ = c;
282
0
  } else {
283
      /* no chance for this in Ascii */
284
0
      *outlen = out - outstart;
285
0
      *inlen = processed - instart;
286
0
      return(-2);
287
0
  }
288
0
  processed = in;
289
0
    }
290
0
    *outlen = out - outstart;
291
0
    *inlen = processed - instart;
292
0
    return(*outlen);
293
0
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
20.5k
              const unsigned char* in, int *inlen) {
313
20.5k
    unsigned char* outstart = out;
314
20.5k
    const unsigned char* base = in;
315
20.5k
    unsigned char* outend;
316
20.5k
    const unsigned char* inend;
317
20.5k
    const unsigned char* instop;
318
319
20.5k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
20.5k
    outend = out + *outlen;
323
20.5k
    inend = in + (*inlen);
324
20.5k
    instop = inend;
325
326
46.0M
    while ((in < inend) && (out < outend - 1)) {
327
46.0M
  if (*in >= 0x80) {
328
46.0M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
46.0M
            *out++ = ((*in) & 0x3F) | 0x80;
330
46.0M
      ++in;
331
46.0M
  }
332
46.0M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
167M
  while ((in < instop) && (*in < 0x80)) {
334
121M
      *out++ = *in++;
335
121M
  }
336
46.0M
    }
337
20.5k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
20.5k
    *outlen = out - outstart;
341
20.5k
    *inlen = in - base;
342
20.5k
    return(*outlen);
343
20.5k
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
0
{
362
0
    int len;
363
364
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
0
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
0
        *outlen = 0;
369
0
        *inlenb = 0;
370
0
        return(0);
371
0
    }
372
0
    if (*outlen > *inlenb) {
373
0
  len = *inlenb;
374
0
    } else {
375
0
  len = *outlen;
376
0
    }
377
0
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
0
    memcpy(out, inb, len);
386
387
0
    *outlen = len;
388
0
    *inlenb = len;
389
0
    return(*outlen);
390
0
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
923k
              const unsigned char* in, int *inlen) {
413
923k
    const unsigned char* processed = in;
414
923k
    const unsigned char* outend;
415
923k
    const unsigned char* outstart = out;
416
923k
    const unsigned char* instart = in;
417
923k
    const unsigned char* inend;
418
923k
    unsigned int c, d;
419
923k
    int trailing;
420
421
923k
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
923k
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
188
  *outlen = 0;
427
188
  *inlen = 0;
428
188
  return(0);
429
188
    }
430
923k
    inend = in + (*inlen);
431
923k
    outend = out + (*outlen);
432
16.7M
    while (in < inend) {
433
16.2M
  d = *in++;
434
16.2M
  if      (d < 0x80)  { c= d; trailing= 0; }
435
4.62M
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
0
      *outlen = out - outstart;
438
0
      *inlen = processed - instart;
439
0
      return(-2);
440
4.62M
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
1.50k
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
132
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
0
  else {
444
      /* no chance for this in IsoLat1 */
445
0
      *outlen = out - outstart;
446
0
      *inlen = processed - instart;
447
0
      return(-2);
448
0
  }
449
450
16.2M
  if (inend - in < trailing) {
451
311
      break;
452
311
  }
453
454
20.8M
  for ( ; trailing; trailing--) {
455
4.62M
      if (in >= inend)
456
0
    break;
457
4.62M
      if (((d= *in++) & 0xC0) != 0x80) {
458
0
    *outlen = out - outstart;
459
0
    *inlen = processed - instart;
460
0
    return(-2);
461
0
      }
462
4.62M
      c <<= 6;
463
4.62M
      c |= d & 0x3F;
464
4.62M
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
16.2M
  if (c <= 0xFF) {
468
15.7M
      if (out >= outend)
469
0
    break;
470
15.7M
      *out++ = c;
471
15.7M
  } else {
472
      /* no chance for this in IsoLat1 */
473
460k
      *outlen = out - outstart;
474
460k
      *inlen = processed - instart;
475
460k
      return(-2);
476
460k
  }
477
15.7M
  processed = in;
478
15.7M
    }
479
462k
    *outlen = out - outstart;
480
462k
    *inlen = processed - instart;
481
462k
    return(*outlen);
482
923k
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
23.7k
{
506
23.7k
    unsigned char* outstart = out;
507
23.7k
    const unsigned char* processed = inb;
508
23.7k
    unsigned char* outend;
509
23.7k
    unsigned short* in = (unsigned short*) inb;
510
23.7k
    unsigned short* inend;
511
23.7k
    unsigned int c, d, inlen;
512
23.7k
    unsigned char *tmp;
513
23.7k
    int bits;
514
515
23.7k
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
23.7k
    outend = out + *outlen;
520
23.7k
    if ((*inlenb % 2) == 1)
521
21.4k
        (*inlenb)--;
522
23.7k
    inlen = *inlenb / 2;
523
23.7k
    inend = in + inlen;
524
8.65M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
8.62M
        if (xmlLittleEndian) {
526
8.62M
      c= *in++;
527
8.62M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
8.62M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
2.44k
      if (in >= inend) {           /* handle split mutli-byte characters */
535
1.95k
    break;
536
1.95k
      }
537
490
      if (xmlLittleEndian) {
538
490
    d = *in++;
539
490
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
490
            if ((d & 0xFC00) == 0xDC00) {
546
151
                c &= 0x03FF;
547
151
                c <<= 10;
548
151
                c |= d & 0x03FF;
549
151
                c += 0x10000;
550
151
            }
551
339
            else {
552
339
    *outlen = out - outstart;
553
339
    *inlenb = processed - inb;
554
339
          return(-2);
555
339
      }
556
490
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
8.62M
        if (out >= outend)
560
0
      break;
561
8.62M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
8.60M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
8.59M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
151
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
25.8M
        for ( ; bits >= 0; bits-= 6) {
567
17.1M
            if (out >= outend)
568
0
          break;
569
17.1M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
17.1M
        }
571
8.62M
  processed = (const unsigned char*) in;
572
8.62M
    }
573
23.4k
    *outlen = out - outstart;
574
23.4k
    *inlenb = processed - inb;
575
23.4k
    return(*outlen);
576
23.7k
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
0
{
596
0
    unsigned short* out = (unsigned short*) outb;
597
0
    const unsigned char* processed = in;
598
0
    const unsigned char *const instart = in;
599
0
    unsigned short* outstart= out;
600
0
    unsigned short* outend;
601
0
    const unsigned char* inend;
602
0
    unsigned int c, d;
603
0
    int trailing;
604
0
    unsigned char *tmp;
605
0
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
0
    if (in == NULL) {
610
0
  *outlen = 0;
611
0
  *inlen = 0;
612
0
  return(0);
613
0
    }
614
0
    inend= in + *inlen;
615
0
    outend = out + (*outlen / 2);
616
0
    while (in < inend) {
617
0
      d= *in++;
618
0
      if      (d < 0x80)  { c= d; trailing= 0; }
619
0
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
0
    *outlen = (out - outstart) * 2;
622
0
    *inlen = processed - instart;
623
0
    return(-2);
624
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
0
      else {
628
  /* no chance for this in UTF-16 */
629
0
  *outlen = (out - outstart) * 2;
630
0
  *inlen = processed - instart;
631
0
  return(-2);
632
0
      }
633
634
0
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
0
      for ( ; trailing; trailing--) {
639
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
0
        break;
641
0
          c <<= 6;
642
0
          c |= d & 0x3F;
643
0
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
0
        if (c < 0x10000) {
647
0
            if (out >= outend)
648
0
          break;
649
0
      if (xmlLittleEndian) {
650
0
    *out++ = c;
651
0
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
0
        }
658
0
        else if (c < 0x110000) {
659
0
            if (out+1 >= outend)
660
0
          break;
661
0
            c -= 0x10000;
662
0
      if (xmlLittleEndian) {
663
0
    *out++ = 0xD800 | (c >> 10);
664
0
    *out++ = 0xDC00 | (c & 0x03FF);
665
0
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
0
        }
679
0
        else
680
0
      break;
681
0
  processed = in;
682
0
    }
683
0
    *outlen = (out - outstart) * 2;
684
0
    *inlen = processed - instart;
685
0
    return(*outlen);
686
0
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
0
{
705
0
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
0
        if (*outlen >= 2) {
710
0
      outb[0] = 0xFF;
711
0
      outb[1] = 0xFE;
712
0
      *outlen = 2;
713
0
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
0
      return(2);
719
0
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
0
    }
724
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
0
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
18.7k
{
749
18.7k
    unsigned char* outstart = out;
750
18.7k
    const unsigned char* processed = inb;
751
18.7k
    unsigned char* outend;
752
18.7k
    unsigned short* in = (unsigned short*) inb;
753
18.7k
    unsigned short* inend;
754
18.7k
    unsigned int c, d, inlen;
755
18.7k
    unsigned char *tmp;
756
18.7k
    int bits;
757
758
18.7k
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
18.7k
    outend = out + *outlen;
763
18.7k
    if ((*inlenb % 2) == 1)
764
17.0k
        (*inlenb)--;
765
18.7k
    inlen = *inlenb / 2;
766
18.7k
    inend= in + inlen;
767
6.37M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
6.35M
  if (xmlLittleEndian) {
769
6.35M
      tmp = (unsigned char *) in;
770
6.35M
      c = *tmp++;
771
6.35M
      c = (c << 8) | *tmp;
772
6.35M
      in++;
773
6.35M
  } else {
774
0
      c= *in++;
775
0
  }
776
6.35M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
2.49k
      if (in >= inend) {           /* handle split mutli-byte characters */
778
1.24k
                break;
779
1.24k
      }
780
1.24k
      if (xmlLittleEndian) {
781
1.24k
    tmp = (unsigned char *) in;
782
1.24k
    d = *tmp++;
783
1.24k
    d = (d << 8) | *tmp;
784
1.24k
    in++;
785
1.24k
      } else {
786
0
    d= *in++;
787
0
      }
788
1.24k
            if ((d & 0xFC00) == 0xDC00) {
789
514
                c &= 0x03FF;
790
514
                c <<= 10;
791
514
                c |= d & 0x03FF;
792
514
                c += 0x10000;
793
514
            }
794
733
            else {
795
733
    *outlen = out - outstart;
796
733
    *inlenb = processed - inb;
797
733
          return(-2);
798
733
      }
799
1.24k
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
6.35M
        if (out >= outend)
803
0
      break;
804
6.35M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
6.25M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
6.20M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
514
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
18.8M
        for ( ; bits >= 0; bits-= 6) {
810
12.4M
            if (out >= outend)
811
0
          break;
812
12.4M
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
12.4M
        }
814
6.35M
  processed = (const unsigned char*) in;
815
6.35M
    }
816
17.9k
    *outlen = out - outstart;
817
17.9k
    *inlenb = processed - inb;
818
17.9k
    return(*outlen);
819
18.7k
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
0
{
839
0
    unsigned short* out = (unsigned short*) outb;
840
0
    const unsigned char* processed = in;
841
0
    const unsigned char *const instart = in;
842
0
    unsigned short* outstart= out;
843
0
    unsigned short* outend;
844
0
    const unsigned char* inend;
845
0
    unsigned int c, d;
846
0
    int trailing;
847
0
    unsigned char *tmp;
848
0
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
0
    if (in == NULL) {
853
0
  *outlen = 0;
854
0
  *inlen = 0;
855
0
  return(0);
856
0
    }
857
0
    inend= in + *inlen;
858
0
    outend = out + (*outlen / 2);
859
0
    while (in < inend) {
860
0
      d= *in++;
861
0
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
0
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
0
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
0
        if (c < 0x10000) {
889
0
            if (out >= outend)  break;
890
0
      if (xmlLittleEndian) {
891
0
    tmp = (unsigned char *) out;
892
0
    *tmp = c >> 8;
893
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
0
    out++;
895
0
      } else {
896
0
    *out++ = c;
897
0
      }
898
0
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
0
  processed = in;
922
0
    }
923
0
    *outlen = (out - outstart) * 2;
924
0
    *inlen = processed - instart;
925
0
    return(*outlen);
926
0
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
210k
{
949
210k
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
210k
    if (len >= 4) {
952
210k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
210k
      (in[2] == 0x00) && (in[3] == 0x3C))
954
201
      return(XML_CHAR_ENCODING_UCS4BE);
955
210k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
210k
      (in[2] == 0x00) && (in[3] == 0x00))
957
724
      return(XML_CHAR_ENCODING_UCS4LE);
958
209k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
209k
      (in[2] == 0x3C) && (in[3] == 0x00))
960
222
      return(XML_CHAR_ENCODING_UCS4_2143);
961
209k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
209k
      (in[2] == 0x00) && (in[3] == 0x00))
963
354
      return(XML_CHAR_ENCODING_UCS4_3412);
964
208k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
208k
      (in[2] == 0xA7) && (in[3] == 0x94))
966
722
      return(XML_CHAR_ENCODING_EBCDIC);
967
207k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
207k
      (in[2] == 0x78) && (in[3] == 0x6D))
969
105k
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
102k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
102k
      (in[2] == 0x3F) && (in[3] == 0x00))
977
1.30k
      return(XML_CHAR_ENCODING_UTF16LE);
978
101k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
101k
      (in[2] == 0x00) && (in[3] == 0x3F))
980
467
      return(XML_CHAR_ENCODING_UTF16BE);
981
101k
    }
982
101k
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
101k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
101k
      (in[2] == 0xBF))
989
5.93k
      return(XML_CHAR_ENCODING_UTF8);
990
101k
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
95.0k
    if (len >= 2) {
993
95.0k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
658
      return(XML_CHAR_ENCODING_UTF16BE);
995
94.4k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
75
      return(XML_CHAR_ENCODING_UTF16LE);
997
94.4k
    }
998
94.3k
    return(XML_CHAR_ENCODING_NONE);
999
95.0k
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
77.0k
xmlGetEncodingAlias(const char *alias) {
1035
77.0k
    int i;
1036
77.0k
    char upper[100];
1037
1038
77.0k
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
77.0k
    if (xmlCharEncodingAliases == NULL)
1042
77.0k
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
4.45k
{
1167
4.45k
    const char *alias;
1168
4.45k
    char upper[500];
1169
4.45k
    int i;
1170
1171
4.45k
    if (name == NULL)
1172
0
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
4.45k
    alias = xmlGetEncodingAlias(name);
1178
4.45k
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
120k
    for (i = 0;i < 499;i++) {
1182
120k
        upper[i] = (char) toupper((unsigned char) name[i]);
1183
120k
  if (upper[i] == 0) break;
1184
120k
    }
1185
4.45k
    upper[i] = 0;
1186
1187
4.45k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
4.45k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
4.45k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
4.45k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
4.45k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
4.45k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
4.09k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
4.09k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
4.09k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
3.16k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
3.16k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
3.16k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
3.16k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
2.81k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
2.81k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
2.81k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
2.51k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
2.51k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
2.51k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
2.51k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
2.51k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
2.51k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
2.51k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
2.51k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
2.51k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
2.51k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
2.51k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
2.51k
    return(XML_CHAR_ENCODING_ERROR);
1235
2.51k
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
2.52k
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
2.52k
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
9
        case XML_CHAR_ENCODING_EBCDIC:
1262
9
            return("EBCDIC");
1263
928
        case XML_CHAR_ENCODING_UCS4LE:
1264
928
            return("ISO-10646-UCS-4");
1265
1
        case XML_CHAR_ENCODING_UCS4BE:
1266
1
            return("ISO-10646-UCS-4");
1267
222
        case XML_CHAR_ENCODING_UCS4_2143:
1268
222
            return("ISO-10646-UCS-4");
1269
354
        case XML_CHAR_ENCODING_UCS4_3412:
1270
354
            return("ISO-10646-UCS-4");
1271
360
        case XML_CHAR_ENCODING_UCS2:
1272
360
            return("ISO-10646-UCS-2");
1273
355
        case XML_CHAR_ENCODING_8859_1:
1274
355
      return("ISO-8859-1");
1275
295
        case XML_CHAR_ENCODING_8859_2:
1276
295
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
2.52k
    }
1300
0
    return(NULL);
1301
2.52k
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
1.11M
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
4
xmlInitEncodingInternal(void) {
1501
4
    unsigned short int tst = 0x1234;
1502
4
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
4
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
4
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
4
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
220k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
220k
    xmlCharEncodingHandlerPtr handler;
1592
1593
220k
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
215k
        case XML_CHAR_ENCODING_NONE:
1597
215k
      return(NULL);
1598
0
        case XML_CHAR_ENCODING_UTF8:
1599
0
      return(NULL);
1600
1.38k
        case XML_CHAR_ENCODING_UTF16LE:
1601
1.38k
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
1.12k
        case XML_CHAR_ENCODING_UTF16BE:
1603
1.12k
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
722
        case XML_CHAR_ENCODING_EBCDIC:
1605
722
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
722
            if (handler != NULL) return(handler);
1607
722
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
722
            if (handler != NULL) return(handler);
1609
722
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
722
            if (handler != NULL) return(handler);
1611
1
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
1
            if (handler != NULL) return(handler);
1613
1
      break;
1614
201
        case XML_CHAR_ENCODING_UCS4BE:
1615
201
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
201
            if (handler != NULL) return(handler);
1617
201
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
201
            if (handler != NULL) return(handler);
1619
1
            handler = xmlFindCharEncodingHandler("UCS4");
1620
1
            if (handler != NULL) return(handler);
1621
1
      break;
1622
724
        case XML_CHAR_ENCODING_UCS4LE:
1623
724
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
724
            if (handler != NULL) return(handler);
1625
724
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
724
            if (handler != NULL) return(handler);
1627
1
            handler = xmlFindCharEncodingHandler("UCS4");
1628
1
            if (handler != NULL) return(handler);
1629
1
      break;
1630
222
        case XML_CHAR_ENCODING_UCS4_2143:
1631
222
      break;
1632
354
        case XML_CHAR_ENCODING_UCS4_3412:
1633
354
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
220k
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
579
    return(NULL);
1712
220k
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
72.5k
xmlFindCharEncodingHandler(const char *name) {
1725
72.5k
    const char *nalias;
1726
72.5k
    const char *norig;
1727
72.5k
    xmlCharEncoding alias;
1728
72.5k
#ifdef LIBXML_ICONV_ENABLED
1729
72.5k
    xmlCharEncodingHandlerPtr enc;
1730
72.5k
    iconv_t icv_in, icv_out;
1731
72.5k
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
72.5k
    char upper[100];
1737
72.5k
    int i;
1738
1739
72.5k
    if (name == NULL) return(NULL);
1740
72.5k
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
72.5k
    norig = name;
1746
72.5k
    nalias = xmlGetEncodingAlias(name);
1747
72.5k
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
593k
    for (i = 0;i < 99;i++) {
1754
592k
        upper[i] = (char) toupper((unsigned char) name[i]);
1755
592k
  if (upper[i] == 0) break;
1756
592k
    }
1757
72.5k
    upper[i] = 0;
1758
1759
572k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
520k
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
20.3k
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
520k
    }
1763
1764
52.1k
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
52.1k
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
52.1k
    icv_in = iconv_open("UTF-8", name);
1779
52.1k
    icv_out = iconv_open(name, "UTF-8");
1780
52.1k
    if (icv_in == (iconv_t) -1) {
1781
4.45k
        icv_in = iconv_open("UTF-8", upper);
1782
4.45k
    }
1783
52.1k
    if (icv_out == (iconv_t) -1) {
1784
4.45k
  icv_out = iconv_open(upper, "UTF-8");
1785
4.45k
    }
1786
52.1k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
47.7k
      enc = (xmlCharEncodingHandlerPtr)
1788
47.7k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
47.7k
      if (enc == NULL) {
1790
7
          iconv_close(icv_in);
1791
7
          iconv_close(icv_out);
1792
7
    return(NULL);
1793
7
      }
1794
47.7k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
47.7k
      enc->name = xmlMemStrdup(name);
1796
47.7k
            if (enc->name == NULL) {
1797
5
                xmlFree(enc);
1798
5
                iconv_close(icv_in);
1799
5
                iconv_close(icv_out);
1800
5
                return(NULL);
1801
5
            }
1802
47.7k
      enc->input = NULL;
1803
47.7k
      enc->output = NULL;
1804
47.7k
      enc->iconv_in = icv_in;
1805
47.7k
      enc->iconv_out = icv_out;
1806
#ifdef DEBUG_ENCODING
1807
            xmlGenericError(xmlGenericErrorContext,
1808
        "Found iconv handler for encoding %s\n", name);
1809
#endif
1810
47.7k
      return enc;
1811
47.7k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1812
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1813
0
        "iconv : problems with filters for '%s'\n", name);
1814
0
      if (icv_in != (iconv_t) -1)
1815
0
    iconv_close(icv_in);
1816
0
      else
1817
0
    iconv_close(icv_out);
1818
0
    }
1819
4.45k
#endif /* LIBXML_ICONV_ENABLED */
1820
#ifdef LIBXML_ICU_ENABLED
1821
    /* check whether icu can handle this */
1822
    ucv_in = openIcuConverter(name, 1);
1823
    ucv_out = openIcuConverter(name, 0);
1824
    if (ucv_in != NULL && ucv_out != NULL) {
1825
      encu = (xmlCharEncodingHandlerPtr)
1826
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1827
      if (encu == NULL) {
1828
                closeIcuConverter(ucv_in);
1829
                closeIcuConverter(ucv_out);
1830
    return(NULL);
1831
      }
1832
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1833
      encu->name = xmlMemStrdup(name);
1834
            if (encu->name == NULL) {
1835
                xmlFree(encu);
1836
                closeIcuConverter(ucv_in);
1837
                closeIcuConverter(ucv_out);
1838
                return(NULL);
1839
            }
1840
      encu->input = NULL;
1841
      encu->output = NULL;
1842
      encu->uconv_in = ucv_in;
1843
      encu->uconv_out = ucv_out;
1844
#ifdef DEBUG_ENCODING
1845
            xmlGenericError(xmlGenericErrorContext,
1846
        "Found ICU converter handler for encoding %s\n", name);
1847
#endif
1848
      return encu;
1849
    } else if (ucv_in != NULL || ucv_out != NULL) {
1850
            closeIcuConverter(ucv_in);
1851
            closeIcuConverter(ucv_out);
1852
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1853
        "ICU converter : problems with filters for '%s'\n", name);
1854
    }
1855
#endif /* LIBXML_ICU_ENABLED */
1856
1857
#ifdef DEBUG_ENCODING
1858
    xmlGenericError(xmlGenericErrorContext,
1859
      "No handler found for encoding %s\n", name);
1860
#endif
1861
1862
    /*
1863
     * Fallback using the canonical names
1864
     */
1865
4.45k
    alias = xmlParseCharEncoding(norig);
1866
4.45k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1867
1.93k
        const char* canon;
1868
1.93k
        canon = xmlGetCharEncodingName(alias);
1869
1.93k
        if ((canon != NULL) && (strcmp(name, canon))) {
1870
652
      return(xmlFindCharEncodingHandler(canon));
1871
652
        }
1872
1.93k
    }
1873
1874
    /* If "none of the above", give up */
1875
3.80k
    return(NULL);
1876
4.45k
}
1877
1878
/************************************************************************
1879
 *                  *
1880
 *    ICONV based generic conversion functions    *
1881
 *                  *
1882
 ************************************************************************/
1883
1884
#ifdef LIBXML_ICONV_ENABLED
1885
/**
1886
 * xmlIconvWrapper:
1887
 * @cd:   iconv converter data structure
1888
 * @out:  a pointer to an array of bytes to store the result
1889
 * @outlen:  the length of @out
1890
 * @in:  a pointer to an array of input bytes
1891
 * @inlen:  the length of @in
1892
 *
1893
 * Returns 0 if success, or
1894
 *     -1 by lack of space, or
1895
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1896
 *        the result of transformation can't fit into the encoding we want), or
1897
 *     -3 if there the last byte can't form a single output char.
1898
 *
1899
 * The value of @inlen after return is the number of octets consumed
1900
 *     as the return value is positive, else unpredictable.
1901
 * The value of @outlen after return is the number of octets produced.
1902
 */
1903
static int
1904
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1905
9.60M
                const unsigned char *in, int *inlen) {
1906
9.60M
    size_t icv_inlen, icv_outlen;
1907
9.60M
    const char *icv_in = (const char *) in;
1908
9.60M
    char *icv_out = (char *) out;
1909
9.60M
    size_t ret;
1910
1911
9.60M
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1912
228
        if (outlen != NULL) *outlen = 0;
1913
228
        return(-1);
1914
228
    }
1915
9.60M
    icv_inlen = *inlen;
1916
9.60M
    icv_outlen = *outlen;
1917
    /*
1918
     * Some versions take const, other versions take non-const input.
1919
     */
1920
9.60M
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1921
9.60M
    *inlen -= icv_inlen;
1922
9.60M
    *outlen -= icv_outlen;
1923
9.60M
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1924
4.81M
#ifdef EILSEQ
1925
4.81M
        if (errno == EILSEQ) {
1926
4.79M
            return -2;
1927
4.79M
        } else
1928
13.4k
#endif
1929
13.4k
#ifdef E2BIG
1930
13.4k
        if (errno == E2BIG) {
1931
10.3k
            return -1;
1932
10.3k
        } else
1933
3.14k
#endif
1934
3.14k
#ifdef EINVAL
1935
3.14k
        if (errno == EINVAL) {
1936
3.14k
            return -3;
1937
3.14k
        } else
1938
0
#endif
1939
0
        {
1940
0
            return -3;
1941
0
        }
1942
4.81M
    }
1943
4.79M
    return 0;
1944
9.60M
}
1945
#endif /* LIBXML_ICONV_ENABLED */
1946
1947
/************************************************************************
1948
 *                  *
1949
 *    ICU based generic conversion functions    *
1950
 *                  *
1951
 ************************************************************************/
1952
1953
#ifdef LIBXML_ICU_ENABLED
1954
/**
1955
 * xmlUconvWrapper:
1956
 * @cd: ICU uconverter data structure
1957
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1958
 * @out:  a pointer to an array of bytes to store the result
1959
 * @outlen:  the length of @out
1960
 * @in:  a pointer to an array of input bytes
1961
 * @inlen:  the length of @in
1962
 * @flush: if true, indicates end of input
1963
 *
1964
 * Returns 0 if success, or
1965
 *     -1 by lack of space, or
1966
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1967
 *        the result of transformation can't fit into the encoding we want), or
1968
 *     -3 if there the last byte can't form a single output char.
1969
 *
1970
 * The value of @inlen after return is the number of octets consumed
1971
 *     as the return value is positive, else unpredictable.
1972
 * The value of @outlen after return is the number of octets produced.
1973
 */
1974
static int
1975
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1976
                const unsigned char *in, int *inlen, int flush) {
1977
    const char *ucv_in = (const char *) in;
1978
    char *ucv_out = (char *) out;
1979
    UErrorCode err = U_ZERO_ERROR;
1980
1981
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1982
        if (outlen != NULL) *outlen = 0;
1983
        return(-1);
1984
    }
1985
1986
    if (toUnicode) {
1987
        /* encoding => UTF-16 => UTF-8 */
1988
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1989
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1990
                       &cd->pivot_source, &cd->pivot_target,
1991
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1992
    } else {
1993
        /* UTF-8 => UTF-16 => encoding */
1994
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1995
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1996
                       &cd->pivot_source, &cd->pivot_target,
1997
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1998
    }
1999
    *inlen = ucv_in - (const char*) in;
2000
    *outlen = ucv_out - (char *) out;
2001
    if (U_SUCCESS(err)) {
2002
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
2003
        if (flush)
2004
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
2005
        return 0;
2006
    }
2007
    if (err == U_BUFFER_OVERFLOW_ERROR)
2008
        return -1;
2009
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2010
        return -2;
2011
    return -3;
2012
}
2013
#endif /* LIBXML_ICU_ENABLED */
2014
2015
/************************************************************************
2016
 *                  *
2017
 *    The real API used by libxml for on-the-fly conversion *
2018
 *                  *
2019
 ************************************************************************/
2020
2021
/**
2022
 * xmlEncInputChunk:
2023
 * @handler:  encoding handler
2024
 * @out:  a pointer to an array of bytes to store the result
2025
 * @outlen:  the length of @out
2026
 * @in:  a pointer to an array of input bytes
2027
 * @inlen:  the length of @in
2028
 * @flush:  flush (ICU-related)
2029
 *
2030
 * Returns 0 if success, or
2031
 *     -1 by lack of space, or
2032
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2033
 *        the result of transformation can't fit into the encoding we want), or
2034
 *     -3 if there the last byte can't form a single output char.
2035
 *
2036
 * The value of @inlen after return is the number of octets consumed
2037
 *     as the return value is 0, else unpredictable.
2038
 * The value of @outlen after return is the number of octets produced.
2039
 */
2040
int
2041
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2042
152k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2043
152k
    int ret;
2044
152k
    (void)flush;
2045
2046
152k
    if (handler->input != NULL) {
2047
63.7k
        ret = handler->input(out, outlen, in, inlen);
2048
63.7k
        if (ret > 0)
2049
22.4k
           ret = 0;
2050
63.7k
    }
2051
88.2k
#ifdef LIBXML_ICONV_ENABLED
2052
88.2k
    else if (handler->iconv_in != NULL) {
2053
87.9k
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2054
87.9k
    }
2055
387
#endif /* LIBXML_ICONV_ENABLED */
2056
#ifdef LIBXML_ICU_ENABLED
2057
    else if (handler->uconv_in != NULL) {
2058
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2059
                              flush);
2060
    }
2061
#endif /* LIBXML_ICU_ENABLED */
2062
387
    else {
2063
387
        *outlen = 0;
2064
387
        *inlen = 0;
2065
387
        ret = -2;
2066
387
    }
2067
2068
152k
    return(ret);
2069
152k
}
2070
2071
/**
2072
 * xmlEncOutputChunk:
2073
 * @handler:  encoding handler
2074
 * @out:  a pointer to an array of bytes to store the result
2075
 * @outlen:  the length of @out
2076
 * @in:  a pointer to an array of input bytes
2077
 * @inlen:  the length of @in
2078
 *
2079
 * Returns 0 if success, or
2080
 *     -1 by lack of space, or
2081
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2082
 *        the result of transformation can't fit into the encoding we want), or
2083
 *     -3 if there the last byte can't form a single output char.
2084
 *     -4 if no output function was found.
2085
 *
2086
 * The value of @inlen after return is the number of octets consumed
2087
 *     as the return value is 0, else unpredictable.
2088
 * The value of @outlen after return is the number of octets produced.
2089
 */
2090
static int
2091
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2092
10.4M
                  int *outlen, const unsigned char *in, int *inlen) {
2093
10.4M
    int ret;
2094
2095
10.4M
    if (handler->output != NULL) {
2096
923k
        ret = handler->output(out, outlen, in, inlen);
2097
923k
        if (ret > 0)
2098
462k
           ret = 0;
2099
923k
    }
2100
9.51M
#ifdef LIBXML_ICONV_ENABLED
2101
9.51M
    else if (handler->iconv_out != NULL) {
2102
9.51M
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2103
9.51M
    }
2104
0
#endif /* LIBXML_ICONV_ENABLED */
2105
#ifdef LIBXML_ICU_ENABLED
2106
    else if (handler->uconv_out != NULL) {
2107
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2108
                              1);
2109
    }
2110
#endif /* LIBXML_ICU_ENABLED */
2111
0
    else {
2112
0
        *outlen = 0;
2113
0
        *inlen = 0;
2114
0
        ret = -4;
2115
0
    }
2116
2117
10.4M
    return(ret);
2118
10.4M
}
2119
2120
/**
2121
 * xmlCharEncFirstLine:
2122
 * @handler:  char encoding transformation data structure
2123
 * @out:  an xmlBuffer for the output.
2124
 * @in:  an xmlBuffer for the input
2125
 *
2126
 * DEPERECATED: Don't use.
2127
 */
2128
int
2129
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2130
0
                    xmlBufferPtr in) {
2131
0
    return(xmlCharEncInFunc(handler, out, in));
2132
0
}
2133
2134
/**
2135
 * xmlCharEncInput:
2136
 * @input: a parser input buffer
2137
 * @flush: try to flush all the raw buffer
2138
 *
2139
 * Generic front-end for the encoding handler on parser input
2140
 *
2141
 * Returns the number of byte written if success, or
2142
 *     -1 general error
2143
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2144
 *        the result of transformation can't fit into the encoding we want), or
2145
 */
2146
int
2147
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2148
4.47M
{
2149
4.47M
    int ret;
2150
4.47M
    size_t written;
2151
4.47M
    size_t toconv;
2152
4.47M
    int c_in;
2153
4.47M
    int c_out;
2154
4.47M
    xmlBufPtr in;
2155
4.47M
    xmlBufPtr out;
2156
2157
4.47M
    if ((input == NULL) || (input->encoder == NULL) ||
2158
4.47M
        (input->buffer == NULL) || (input->raw == NULL))
2159
12
        return (-1);
2160
4.47M
    out = input->buffer;
2161
4.47M
    in = input->raw;
2162
2163
4.47M
    toconv = xmlBufUse(in);
2164
4.47M
    if (toconv == 0)
2165
4.32M
        return (0);
2166
151k
    if ((toconv > 64 * 1024) && (flush == 0))
2167
1.88k
        toconv = 64 * 1024;
2168
151k
    written = xmlBufAvail(out);
2169
151k
    if (toconv * 2 >= written) {
2170
45.4k
        if (xmlBufGrow(out, toconv * 2) < 0)
2171
5
            return (-1);
2172
45.4k
        written = xmlBufAvail(out);
2173
45.4k
    }
2174
151k
    if ((written > 128 * 1024) && (flush == 0))
2175
1.90k
        written = 128 * 1024;
2176
2177
151k
    c_in = toconv;
2178
151k
    c_out = written;
2179
151k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2180
151k
                           xmlBufContent(in), &c_in, flush);
2181
151k
    xmlBufShrink(in, c_in);
2182
151k
    xmlBufAddLen(out, c_out);
2183
151k
    if (ret == -1)
2184
10.9k
        ret = -3;
2185
2186
151k
    switch (ret) {
2187
96.5k
        case 0:
2188
#ifdef DEBUG_ENCODING
2189
            xmlGenericError(xmlGenericErrorContext,
2190
                            "converted %d bytes to %d bytes of input\n",
2191
                            c_in, c_out);
2192
#endif
2193
96.5k
            break;
2194
0
        case -1:
2195
#ifdef DEBUG_ENCODING
2196
            xmlGenericError(xmlGenericErrorContext,
2197
                         "converted %d bytes to %d bytes of input, %d left\n",
2198
                            c_in, c_out, (int)xmlBufUse(in));
2199
#endif
2200
0
            break;
2201
12.1k
        case -3:
2202
#ifdef DEBUG_ENCODING
2203
            xmlGenericError(xmlGenericErrorContext,
2204
                        "converted %d bytes to %d bytes of input, %d left\n",
2205
                            c_in, c_out, (int)xmlBufUse(in));
2206
#endif
2207
12.1k
            break;
2208
42.5k
        case -2: {
2209
42.5k
            char buf[50];
2210
42.5k
            const xmlChar *content = xmlBufContent(in);
2211
2212
42.5k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2213
42.5k
         content[0], content[1],
2214
42.5k
         content[2], content[3]);
2215
42.5k
      buf[49] = 0;
2216
42.5k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2217
42.5k
        "input conversion failed due to input error, bytes %s\n",
2218
42.5k
               buf);
2219
42.5k
        }
2220
151k
    }
2221
    /*
2222
     * Ignore when input buffer is not on a boundary
2223
     */
2224
151k
    if (ret == -3)
2225
12.1k
        ret = 0;
2226
151k
    return (c_out? c_out : ret);
2227
151k
}
2228
2229
/**
2230
 * xmlCharEncInFunc:
2231
 * @handler:  char encoding transformation data structure
2232
 * @out:  an xmlBuffer for the output.
2233
 * @in:  an xmlBuffer for the input
2234
 *
2235
 * Generic front-end for the encoding handler input function
2236
 *
2237
 * Returns the number of byte written if success, or
2238
 *     -1 general error
2239
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2240
 *        the result of transformation can't fit into the encoding we want), or
2241
 */
2242
int
2243
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2244
                 xmlBufferPtr in)
2245
0
{
2246
0
    int ret;
2247
0
    int written;
2248
0
    int toconv;
2249
2250
0
    if (handler == NULL)
2251
0
        return (-1);
2252
0
    if (out == NULL)
2253
0
        return (-1);
2254
0
    if (in == NULL)
2255
0
        return (-1);
2256
2257
0
    toconv = in->use;
2258
0
    if (toconv == 0)
2259
0
        return (0);
2260
0
    written = out->size - out->use -1; /* count '\0' */
2261
0
    if (toconv * 2 >= written) {
2262
0
        xmlBufferGrow(out, out->size + toconv * 2);
2263
0
        written = out->size - out->use - 1;
2264
0
    }
2265
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2266
0
                           in->content, &toconv, 1);
2267
0
    xmlBufferShrink(in, toconv);
2268
0
    out->use += written;
2269
0
    out->content[out->use] = 0;
2270
0
    if (ret == -1)
2271
0
        ret = -3;
2272
2273
0
    switch (ret) {
2274
0
        case 0:
2275
#ifdef DEBUG_ENCODING
2276
            xmlGenericError(xmlGenericErrorContext,
2277
                            "converted %d bytes to %d bytes of input\n",
2278
                            toconv, written);
2279
#endif
2280
0
            break;
2281
0
        case -1:
2282
#ifdef DEBUG_ENCODING
2283
            xmlGenericError(xmlGenericErrorContext,
2284
                         "converted %d bytes to %d bytes of input, %d left\n",
2285
                            toconv, written, in->use);
2286
#endif
2287
0
            break;
2288
0
        case -3:
2289
#ifdef DEBUG_ENCODING
2290
            xmlGenericError(xmlGenericErrorContext,
2291
                        "converted %d bytes to %d bytes of input, %d left\n",
2292
                            toconv, written, in->use);
2293
#endif
2294
0
            break;
2295
0
        case -2: {
2296
0
            char buf[50];
2297
2298
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2299
0
         in->content[0], in->content[1],
2300
0
         in->content[2], in->content[3]);
2301
0
      buf[49] = 0;
2302
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2303
0
        "input conversion failed due to input error, bytes %s\n",
2304
0
               buf);
2305
0
        }
2306
0
    }
2307
    /*
2308
     * Ignore when input buffer is not on a boundary
2309
     */
2310
0
    if (ret == -3)
2311
0
        ret = 0;
2312
0
    return (written? written : ret);
2313
0
}
2314
2315
#ifdef LIBXML_OUTPUT_ENABLED
2316
/**
2317
 * xmlCharEncOutput:
2318
 * @output: a parser output buffer
2319
 * @init: is this an initialization call without data
2320
 *
2321
 * Generic front-end for the encoding handler on parser output
2322
 * a first call with @init == 1 has to be made first to initiate the
2323
 * output in case of non-stateless encoding needing to initiate their
2324
 * state or the output (like the BOM in UTF16).
2325
 * In case of UTF8 sequence conversion errors for the given encoder,
2326
 * the content will be automatically remapped to a CharRef sequence.
2327
 *
2328
 * Returns the number of byte written if success, or
2329
 *     -1 general error
2330
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2331
 *        the result of transformation can't fit into the encoding we want), or
2332
 */
2333
int
2334
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2335
6.93k
{
2336
6.93k
    int ret;
2337
6.93k
    size_t written;
2338
6.93k
    int writtentot = 0;
2339
6.93k
    size_t toconv;
2340
6.93k
    int c_in;
2341
6.93k
    int c_out;
2342
6.93k
    xmlBufPtr in;
2343
6.93k
    xmlBufPtr out;
2344
2345
6.93k
    if ((output == NULL) || (output->encoder == NULL) ||
2346
6.93k
        (output->buffer == NULL) || (output->conv == NULL))
2347
0
        return (-1);
2348
6.93k
    out = output->conv;
2349
6.93k
    in = output->buffer;
2350
2351
5.22M
retry:
2352
2353
5.22M
    written = xmlBufAvail(out);
2354
2355
    /*
2356
     * First specific handling of the initialization call
2357
     */
2358
5.22M
    if (init) {
2359
268
        c_in = 0;
2360
268
        c_out = written;
2361
        /* TODO: Check return value. */
2362
268
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2363
268
                          NULL, &c_in);
2364
268
        xmlBufAddLen(out, c_out);
2365
#ifdef DEBUG_ENCODING
2366
  xmlGenericError(xmlGenericErrorContext,
2367
    "initialized encoder\n");
2368
#endif
2369
268
        return(c_out);
2370
268
    }
2371
2372
    /*
2373
     * Conversion itself.
2374
     */
2375
5.22M
    toconv = xmlBufUse(in);
2376
5.22M
    if (toconv == 0)
2377
996
        return (writtentot);
2378
5.22M
    if (toconv > 64 * 1024)
2379
42.6k
        toconv = 64 * 1024;
2380
5.22M
    if (toconv * 4 >= written) {
2381
722
        xmlBufGrow(out, toconv * 4);
2382
722
        written = xmlBufAvail(out);
2383
722
    }
2384
5.22M
    if (written > 256 * 1024)
2385
3.38M
        written = 256 * 1024;
2386
2387
5.22M
    c_in = toconv;
2388
5.22M
    c_out = written;
2389
5.22M
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2390
5.22M
                            xmlBufContent(in), &c_in);
2391
5.22M
    xmlBufShrink(in, c_in);
2392
5.22M
    xmlBufAddLen(out, c_out);
2393
5.22M
    writtentot += c_out;
2394
5.22M
    if (ret == -1) {
2395
176
        if (c_out > 0) {
2396
            /* Can be a limitation of iconv or uconv */
2397
0
            goto retry;
2398
0
        }
2399
176
        ret = -3;
2400
176
    }
2401
2402
    /*
2403
     * Attempt to handle error cases
2404
     */
2405
5.22M
    switch (ret) {
2406
3.58k
        case 0:
2407
#ifdef DEBUG_ENCODING
2408
      xmlGenericError(xmlGenericErrorContext,
2409
        "converted %d bytes to %d bytes of output\n",
2410
              c_in, c_out);
2411
#endif
2412
3.58k
      break;
2413
0
        case -1:
2414
#ifdef DEBUG_ENCODING
2415
      xmlGenericError(xmlGenericErrorContext,
2416
        "output conversion failed by lack of space\n");
2417
#endif
2418
0
      break;
2419
2.08k
        case -3:
2420
#ifdef DEBUG_ENCODING
2421
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2422
              c_in, c_out, (int) xmlBufUse(in));
2423
#endif
2424
2.08k
      break;
2425
0
        case -4:
2426
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2427
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2428
0
            ret = -1;
2429
0
            break;
2430
5.21M
        case -2: {
2431
5.21M
      xmlChar charref[20];
2432
5.21M
      int len = xmlBufUse(in);
2433
5.21M
            xmlChar *content = xmlBufContent(in);
2434
5.21M
      int cur, charrefLen;
2435
2436
5.21M
      cur = xmlGetUTF8Char(content, &len);
2437
5.21M
      if (cur <= 0)
2438
0
                break;
2439
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                    "handling output conversion error\n");
2443
            xmlGenericError(xmlGenericErrorContext,
2444
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2445
                    content[0], content[1],
2446
                    content[2], content[3]);
2447
#endif
2448
            /*
2449
             * Removes the UTF8 sequence, and replace it by a charref
2450
             * and continue the transcoding phase, hoping the error
2451
             * did not mangle the encoder state.
2452
             */
2453
5.21M
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2454
5.21M
                             "&#%d;", cur);
2455
5.21M
            xmlBufShrink(in, len);
2456
5.21M
            xmlBufGrow(out, charrefLen * 4);
2457
5.21M
            c_out = xmlBufAvail(out);
2458
5.21M
            c_in = charrefLen;
2459
5.21M
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2460
5.21M
                                    charref, &c_in);
2461
2462
5.21M
      if ((ret < 0) || (c_in != charrefLen)) {
2463
0
    char buf[50];
2464
2465
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2466
0
       content[0], content[1],
2467
0
       content[2], content[3]);
2468
0
    buf[49] = 0;
2469
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2470
0
        "output conversion failed due to conv error, bytes %s\n",
2471
0
             buf);
2472
0
    content[0] = ' ';
2473
0
                break;
2474
0
      }
2475
2476
5.21M
            xmlBufAddLen(out, c_out);
2477
5.21M
            writtentot += c_out;
2478
5.21M
            goto retry;
2479
5.21M
  }
2480
5.22M
    }
2481
5.66k
    return(writtentot ? writtentot : ret);
2482
5.22M
}
2483
#endif
2484
2485
/**
2486
 * xmlCharEncOutFunc:
2487
 * @handler:  char encoding transformation data structure
2488
 * @out:  an xmlBuffer for the output.
2489
 * @in:  an xmlBuffer for the input
2490
 *
2491
 * Generic front-end for the encoding handler output function
2492
 * a first call with @in == NULL has to be made firs to initiate the
2493
 * output in case of non-stateless encoding needing to initiate their
2494
 * state or the output (like the BOM in UTF16).
2495
 * In case of UTF8 sequence conversion errors for the given encoder,
2496
 * the content will be automatically remapped to a CharRef sequence.
2497
 *
2498
 * Returns the number of byte written if success, or
2499
 *     -1 general error
2500
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2501
 *        the result of transformation can't fit into the encoding we want), or
2502
 */
2503
int
2504
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2505
0
                  xmlBufferPtr in) {
2506
0
    int ret;
2507
0
    int written;
2508
0
    int writtentot = 0;
2509
0
    int toconv;
2510
2511
0
    if (handler == NULL) return(-1);
2512
0
    if (out == NULL) return(-1);
2513
2514
0
retry:
2515
2516
0
    written = out->size - out->use;
2517
2518
0
    if (written > 0)
2519
0
  written--; /* Gennady: count '/0' */
2520
2521
    /*
2522
     * First specific handling of in = NULL, i.e. the initialization call
2523
     */
2524
0
    if (in == NULL) {
2525
0
        toconv = 0;
2526
        /* TODO: Check return value. */
2527
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2528
0
                          NULL, &toconv);
2529
0
        out->use += written;
2530
0
        out->content[out->use] = 0;
2531
#ifdef DEBUG_ENCODING
2532
  xmlGenericError(xmlGenericErrorContext,
2533
    "initialized encoder\n");
2534
#endif
2535
0
        return(0);
2536
0
    }
2537
2538
    /*
2539
     * Conversion itself.
2540
     */
2541
0
    toconv = in->use;
2542
0
    if (toconv == 0)
2543
0
  return(0);
2544
0
    if (toconv * 4 >= written) {
2545
0
        xmlBufferGrow(out, toconv * 4);
2546
0
  written = out->size - out->use - 1;
2547
0
    }
2548
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2549
0
                            in->content, &toconv);
2550
0
    xmlBufferShrink(in, toconv);
2551
0
    out->use += written;
2552
0
    writtentot += written;
2553
0
    out->content[out->use] = 0;
2554
0
    if (ret == -1) {
2555
0
        if (written > 0) {
2556
            /* Can be a limitation of iconv or uconv */
2557
0
            goto retry;
2558
0
        }
2559
0
        ret = -3;
2560
0
    }
2561
2562
    /*
2563
     * Attempt to handle error cases
2564
     */
2565
0
    switch (ret) {
2566
0
        case 0:
2567
#ifdef DEBUG_ENCODING
2568
      xmlGenericError(xmlGenericErrorContext,
2569
        "converted %d bytes to %d bytes of output\n",
2570
              toconv, written);
2571
#endif
2572
0
      break;
2573
0
        case -1:
2574
#ifdef DEBUG_ENCODING
2575
      xmlGenericError(xmlGenericErrorContext,
2576
        "output conversion failed by lack of space\n");
2577
#endif
2578
0
      break;
2579
0
        case -3:
2580
#ifdef DEBUG_ENCODING
2581
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2582
              toconv, written, in->use);
2583
#endif
2584
0
      break;
2585
0
        case -4:
2586
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2587
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2588
0
      ret = -1;
2589
0
            break;
2590
0
        case -2: {
2591
0
      xmlChar charref[20];
2592
0
      int len = in->use;
2593
0
      const xmlChar *utf = (const xmlChar *) in->content;
2594
0
      int cur, charrefLen;
2595
2596
0
      cur = xmlGetUTF8Char(utf, &len);
2597
0
      if (cur <= 0)
2598
0
                break;
2599
2600
#ifdef DEBUG_ENCODING
2601
            xmlGenericError(xmlGenericErrorContext,
2602
                    "handling output conversion error\n");
2603
            xmlGenericError(xmlGenericErrorContext,
2604
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2605
                    in->content[0], in->content[1],
2606
                    in->content[2], in->content[3]);
2607
#endif
2608
            /*
2609
             * Removes the UTF8 sequence, and replace it by a charref
2610
             * and continue the transcoding phase, hoping the error
2611
             * did not mangle the encoder state.
2612
             */
2613
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2614
0
                             "&#%d;", cur);
2615
0
            xmlBufferShrink(in, len);
2616
0
            xmlBufferGrow(out, charrefLen * 4);
2617
0
      written = out->size - out->use - 1;
2618
0
            toconv = charrefLen;
2619
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2620
0
                                    charref, &toconv);
2621
2622
0
      if ((ret < 0) || (toconv != charrefLen)) {
2623
0
    char buf[50];
2624
2625
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2626
0
       in->content[0], in->content[1],
2627
0
       in->content[2], in->content[3]);
2628
0
    buf[49] = 0;
2629
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2630
0
        "output conversion failed due to conv error, bytes %s\n",
2631
0
             buf);
2632
0
    in->content[0] = ' ';
2633
0
          break;
2634
0
      }
2635
2636
0
            out->use += written;
2637
0
            writtentot += written;
2638
0
            out->content[out->use] = 0;
2639
0
            goto retry;
2640
0
  }
2641
0
    }
2642
0
    return(writtentot ? writtentot : ret);
2643
0
}
2644
2645
/**
2646
 * xmlCharEncCloseFunc:
2647
 * @handler:  char encoding transformation data structure
2648
 *
2649
 * Generic front-end for encoding handler close function
2650
 *
2651
 * Returns 0 if success, or -1 in case of error
2652
 */
2653
int
2654
70.5k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2655
70.5k
    int ret = 0;
2656
70.5k
    int tofree = 0;
2657
70.5k
    int i = 0;
2658
2659
70.5k
    if (handler == NULL) return(-1);
2660
2661
538k
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2662
490k
        if (handler == &defaultHandlers[i])
2663
22.8k
            return(0);
2664
490k
    }
2665
2666
47.7k
    if (handlers != NULL) {
2667
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2668
0
            if (handler == handlers[i])
2669
0
                return(0);
2670
0
  }
2671
0
    }
2672
47.7k
#ifdef LIBXML_ICONV_ENABLED
2673
    /*
2674
     * Iconv handlers can be used only once, free the whole block.
2675
     * and the associated icon resources.
2676
     */
2677
47.7k
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2678
47.7k
        tofree = 1;
2679
47.7k
  if (handler->iconv_out != NULL) {
2680
47.7k
      if (iconv_close(handler->iconv_out))
2681
0
    ret = -1;
2682
47.7k
      handler->iconv_out = NULL;
2683
47.7k
  }
2684
47.7k
  if (handler->iconv_in != NULL) {
2685
47.7k
      if (iconv_close(handler->iconv_in))
2686
0
    ret = -1;
2687
47.7k
      handler->iconv_in = NULL;
2688
47.7k
  }
2689
47.7k
    }
2690
47.7k
#endif /* LIBXML_ICONV_ENABLED */
2691
#ifdef LIBXML_ICU_ENABLED
2692
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2693
        tofree = 1;
2694
  if (handler->uconv_out != NULL) {
2695
      closeIcuConverter(handler->uconv_out);
2696
      handler->uconv_out = NULL;
2697
  }
2698
  if (handler->uconv_in != NULL) {
2699
      closeIcuConverter(handler->uconv_in);
2700
      handler->uconv_in = NULL;
2701
  }
2702
    }
2703
#endif
2704
47.7k
    if (tofree) {
2705
        /* free up only dynamic handlers iconv/uconv */
2706
47.7k
        if (handler->name != NULL)
2707
47.7k
            xmlFree(handler->name);
2708
47.7k
        handler->name = NULL;
2709
47.7k
        xmlFree(handler);
2710
47.7k
    }
2711
#ifdef DEBUG_ENCODING
2712
    if (ret)
2713
        xmlGenericError(xmlGenericErrorContext,
2714
    "failed to close the encoding handler\n");
2715
    else
2716
        xmlGenericError(xmlGenericErrorContext,
2717
    "closed the encoding handler\n");
2718
#endif
2719
2720
47.7k
    return(ret);
2721
47.7k
}
2722
2723
/**
2724
 * xmlByteConsumed:
2725
 * @ctxt: an XML parser context
2726
 *
2727
 * This function provides the current index of the parser relative
2728
 * to the start of the current entity. This function is computed in
2729
 * bytes from the beginning starting at zero and finishing at the
2730
 * size in byte of the file if parsing a file. The function is
2731
 * of constant cost if the input is UTF-8 but can be costly if run
2732
 * on non-UTF-8 input.
2733
 *
2734
 * Returns the index in bytes from the beginning of the entity or -1
2735
 *         in case the index could not be computed.
2736
 */
2737
long
2738
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2739
0
    xmlParserInputPtr in;
2740
2741
0
    if (ctxt == NULL) return(-1);
2742
0
    in = ctxt->input;
2743
0
    if (in == NULL)  return(-1);
2744
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2745
0
        unsigned int unused = 0;
2746
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2747
        /*
2748
   * Encoding conversion, compute the number of unused original
2749
   * bytes from the input not consumed and subtract that from
2750
   * the raw consumed value, this is not a cheap operation
2751
   */
2752
0
        if (in->end - in->cur > 0) {
2753
0
      unsigned char convbuf[32000];
2754
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2755
0
      int toconv = in->end - in->cur, written = 32000;
2756
2757
0
      int ret;
2758
2759
0
            do {
2760
0
                toconv = in->end - cur;
2761
0
                written = 32000;
2762
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2763
0
                                        cur, &toconv);
2764
0
                if (ret < 0) {
2765
0
                    if (written > 0)
2766
0
                        ret = -2;
2767
0
                    else
2768
0
                        return(-1);
2769
0
                }
2770
0
                unused += written;
2771
0
                cur += toconv;
2772
0
            } while (ret == -2);
2773
0
  }
2774
0
  if (in->buf->rawconsumed < unused)
2775
0
      return(-1);
2776
0
  return(in->buf->rawconsumed - unused);
2777
0
    }
2778
0
    return(in->consumed + (in->cur - in->base));
2779
0
}
2780
2781
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2782
#ifdef LIBXML_ISO8859X_ENABLED
2783
2784
/**
2785
 * UTF8ToISO8859x:
2786
 * @out:  a pointer to an array of bytes to store the result
2787
 * @outlen:  the length of @out
2788
 * @in:  a pointer to an array of UTF-8 chars
2789
 * @inlen:  the length of @in
2790
 * @xlattable: the 2-level transcoding table
2791
 *
2792
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2793
 * block of chars out.
2794
 *
2795
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2796
 * The value of @inlen after return is the number of octets consumed
2797
 *     as the return value is positive, else unpredictable.
2798
 * The value of @outlen after return is the number of octets consumed.
2799
 */
2800
static int
2801
UTF8ToISO8859x(unsigned char* out, int *outlen,
2802
              const unsigned char* in, int *inlen,
2803
              const unsigned char* const xlattable) {
2804
    const unsigned char* outstart = out;
2805
    const unsigned char* inend;
2806
    const unsigned char* instart = in;
2807
    const unsigned char* processed = in;
2808
2809
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2810
        (xlattable == NULL))
2811
  return(-1);
2812
    if (in == NULL) {
2813
        /*
2814
        * initialization nothing to do
2815
        */
2816
        *outlen = 0;
2817
        *inlen = 0;
2818
        return(0);
2819
    }
2820
    inend = in + (*inlen);
2821
    while (in < inend) {
2822
        unsigned char d = *in++;
2823
        if  (d < 0x80)  {
2824
            *out++ = d;
2825
        } else if (d < 0xC0) {
2826
            /* trailing byte in leading position */
2827
            *outlen = out - outstart;
2828
            *inlen = processed - instart;
2829
            return(-2);
2830
        } else if (d < 0xE0) {
2831
            unsigned char c;
2832
            if (!(in < inend)) {
2833
                /* trailing byte not in input buffer */
2834
                *outlen = out - outstart;
2835
                *inlen = processed - instart;
2836
                return(-3);
2837
            }
2838
            c = *in++;
2839
            if ((c & 0xC0) != 0x80) {
2840
                /* not a trailing byte */
2841
                *outlen = out - outstart;
2842
                *inlen = processed - instart;
2843
                return(-2);
2844
            }
2845
            c = c & 0x3F;
2846
            d = d & 0x1F;
2847
            d = xlattable [48 + c + xlattable [d] * 64];
2848
            if (d == 0) {
2849
                /* not in character set */
2850
                *outlen = out - outstart;
2851
                *inlen = processed - instart;
2852
                return(-2);
2853
            }
2854
            *out++ = d;
2855
        } else if (d < 0xF0) {
2856
            unsigned char c1;
2857
            unsigned char c2;
2858
            if (!(in < inend - 1)) {
2859
                /* trailing bytes not in input buffer */
2860
                *outlen = out - outstart;
2861
                *inlen = processed - instart;
2862
                return(-3);
2863
            }
2864
            c1 = *in++;
2865
            if ((c1 & 0xC0) != 0x80) {
2866
                /* not a trailing byte (c1) */
2867
                *outlen = out - outstart;
2868
                *inlen = processed - instart;
2869
                return(-2);
2870
            }
2871
            c2 = *in++;
2872
            if ((c2 & 0xC0) != 0x80) {
2873
                /* not a trailing byte (c2) */
2874
                *outlen = out - outstart;
2875
                *inlen = processed - instart;
2876
                return(-2);
2877
            }
2878
            c1 = c1 & 0x3F;
2879
            c2 = c2 & 0x3F;
2880
      d = d & 0x0F;
2881
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2882
      xlattable [32 + d] * 64] * 64];
2883
            if (d == 0) {
2884
                /* not in character set */
2885
                *outlen = out - outstart;
2886
                *inlen = processed - instart;
2887
                return(-2);
2888
            }
2889
            *out++ = d;
2890
        } else {
2891
            /* cannot transcode >= U+010000 */
2892
            *outlen = out - outstart;
2893
            *inlen = processed - instart;
2894
            return(-2);
2895
        }
2896
        processed = in;
2897
    }
2898
    *outlen = out - outstart;
2899
    *inlen = processed - instart;
2900
    return(*outlen);
2901
}
2902
2903
/**
2904
 * ISO8859xToUTF8
2905
 * @out:  a pointer to an array of bytes to store the result
2906
 * @outlen:  the length of @out
2907
 * @in:  a pointer to an array of ISO Latin 1 chars
2908
 * @inlen:  the length of @in
2909
 *
2910
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2911
 * block of chars out.
2912
 * Returns 0 if success, or -1 otherwise
2913
 * The value of @inlen after return is the number of octets consumed
2914
 * The value of @outlen after return is the number of octets produced.
2915
 */
2916
static int
2917
ISO8859xToUTF8(unsigned char* out, int *outlen,
2918
              const unsigned char* in, int *inlen,
2919
              unsigned short const *unicodetable) {
2920
    unsigned char* outstart = out;
2921
    unsigned char* outend;
2922
    const unsigned char* instart = in;
2923
    const unsigned char* inend;
2924
    const unsigned char* instop;
2925
    unsigned int c;
2926
2927
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2928
        (in == NULL) || (unicodetable == NULL))
2929
  return(-1);
2930
    outend = out + *outlen;
2931
    inend = in + *inlen;
2932
    instop = inend;
2933
2934
    while ((in < inend) && (out < outend - 2)) {
2935
        if (*in >= 0x80) {
2936
            c = unicodetable [*in - 0x80];
2937
            if (c == 0) {
2938
                /* undefined code point */
2939
                *outlen = out - outstart;
2940
                *inlen = in - instart;
2941
                return (-1);
2942
            }
2943
            if (c < 0x800) {
2944
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2945
                *out++ = (c & 0x3F) | 0x80;
2946
            } else {
2947
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2948
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2949
                *out++ = (c & 0x3F) | 0x80;
2950
            }
2951
            ++in;
2952
        }
2953
        if (instop - in > outend - out) instop = in + (outend - out);
2954
        while ((*in < 0x80) && (in < instop)) {
2955
            *out++ = *in++;
2956
        }
2957
    }
2958
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2959
        *out++ =  *in++;
2960
    }
2961
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2962
        *out++ =  *in++;
2963
    }
2964
    *outlen = out - outstart;
2965
    *inlen = in - instart;
2966
    return (*outlen);
2967
}
2968
2969
2970
/************************************************************************
2971
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2972
 ************************************************************************/
2973
2974
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2975
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2976
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2977
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2978
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2979
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2980
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2981
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2982
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2983
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2984
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2985
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2986
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2987
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2988
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2989
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2990
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2991
};
2992
2993
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2994
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2995
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3002
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3003
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3004
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3005
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3006
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3007
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3009
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3010
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3011
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3014
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3015
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3016
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3017
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3018
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3019
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3020
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3021
};
3022
3023
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3024
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3025
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3026
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3027
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3028
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3029
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3030
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3031
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3032
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3033
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3034
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3035
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3036
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3037
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3038
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3039
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3040
};
3041
3042
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3043
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3044
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3051
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3052
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3053
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3054
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3055
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3056
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3057
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3068
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3070
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3071
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3072
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3073
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3074
};
3075
3076
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3077
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3078
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3079
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3080
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3081
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3082
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3083
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3084
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3085
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3086
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3087
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3088
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3089
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3090
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3091
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3092
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3093
};
3094
3095
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3096
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3097
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3104
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3105
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3106
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3107
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3108
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3110
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3112
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3113
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3114
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3115
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3120
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3121
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3122
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3123
};
3124
3125
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3126
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3127
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3128
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3129
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3130
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3131
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3132
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3133
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3134
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3135
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3136
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3137
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3138
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3139
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3140
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3141
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3142
};
3143
3144
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3145
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3153
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3154
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3155
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3157
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3158
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3159
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3160
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3161
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172
};
3173
3174
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3175
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3176
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3177
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3178
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3179
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3180
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3181
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3182
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3183
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3184
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3185
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3186
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3187
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3188
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3189
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3190
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3191
};
3192
3193
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3194
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3196
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3200
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3202
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3203
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3211
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3212
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3213
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3214
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217
};
3218
3219
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3220
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3225
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3226
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3227
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3228
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3229
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3230
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3231
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3232
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3233
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3234
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3235
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3236
};
3237
3238
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3239
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3240
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3249
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3250
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3263
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3264
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3265
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3266
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
};
3271
3272
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3273
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3278
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3280
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3281
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3282
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3283
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3285
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3286
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3287
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3288
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3289
};
3290
3291
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3292
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3294
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3302
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3303
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3316
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3321
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
};
3324
3325
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3326
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3327
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3328
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3329
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3330
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3331
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3332
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3333
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3334
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3335
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3336
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3337
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3338
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3339
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3340
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3341
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3342
};
3343
3344
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3345
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3353
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3354
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3355
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3356
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3357
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3358
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3359
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
};
3369
3370
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3371
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3372
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3373
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3374
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3375
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3376
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3377
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3378
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3379
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3380
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3381
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3382
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3383
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3384
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3385
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3386
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3387
};
3388
3389
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3390
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3398
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3399
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3400
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3401
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3402
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3404
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3408
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3409
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3418
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3419
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3420
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3421
};
3422
3423
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3424
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3425
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3426
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3427
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3428
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3429
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3430
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3431
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3432
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3433
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3434
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3435
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3436
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3437
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3438
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3439
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3440
};
3441
3442
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3443
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3451
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3452
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3458
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3459
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3460
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3461
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3462
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3467
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
};
3471
3472
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3473
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3474
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3475
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3476
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3477
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3478
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3479
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3480
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3481
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3482
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3483
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3484
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3485
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3486
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3487
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3488
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3489
};
3490
3491
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3492
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3500
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3501
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3502
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3503
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3513
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3515
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3517
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3518
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3519
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3520
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3521
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3522
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3523
};
3524
3525
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3526
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3527
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3528
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3529
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3530
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3531
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3532
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3533
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3534
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3535
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3536
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3537
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3538
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3539
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3540
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3541
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3542
};
3543
3544
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3545
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3553
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3554
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3555
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3560
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3580
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3583
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3585
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3586
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3587
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3588
};
3589
3590
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3591
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3596
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3597
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3598
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3599
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3602
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3603
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3606
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3607
};
3608
3609
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3610
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3620
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3621
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3628
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3633
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3634
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3635
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3636
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3637
};
3638
3639
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3640
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3641
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3642
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3643
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3644
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3645
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3646
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3647
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3648
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3649
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3650
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3651
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3652
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3653
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3654
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3655
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3656
};
3657
3658
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3659
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3667
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3668
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3669
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3670
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3671
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3676
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3678
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3685
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3695
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3696
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3697
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3698
};
3699
3700
3701
/*
3702
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3703
 */
3704
3705
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3706
    const unsigned char* in, int *inlen) {
3707
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3708
}
3709
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3710
    const unsigned char* in, int *inlen) {
3711
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3712
}
3713
3714
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3715
    const unsigned char* in, int *inlen) {
3716
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3717
}
3718
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3719
    const unsigned char* in, int *inlen) {
3720
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3721
}
3722
3723
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3724
    const unsigned char* in, int *inlen) {
3725
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3726
}
3727
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3728
    const unsigned char* in, int *inlen) {
3729
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3730
}
3731
3732
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3733
    const unsigned char* in, int *inlen) {
3734
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3735
}
3736
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3737
    const unsigned char* in, int *inlen) {
3738
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3739
}
3740
3741
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3742
    const unsigned char* in, int *inlen) {
3743
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3744
}
3745
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3746
    const unsigned char* in, int *inlen) {
3747
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3748
}
3749
3750
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3751
    const unsigned char* in, int *inlen) {
3752
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3753
}
3754
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3755
    const unsigned char* in, int *inlen) {
3756
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3757
}
3758
3759
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3760
    const unsigned char* in, int *inlen) {
3761
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3762
}
3763
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3764
    const unsigned char* in, int *inlen) {
3765
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3766
}
3767
3768
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3769
    const unsigned char* in, int *inlen) {
3770
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3771
}
3772
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3773
    const unsigned char* in, int *inlen) {
3774
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3775
}
3776
3777
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3778
    const unsigned char* in, int *inlen) {
3779
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3780
}
3781
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3782
    const unsigned char* in, int *inlen) {
3783
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3784
}
3785
3786
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3787
    const unsigned char* in, int *inlen) {
3788
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3789
}
3790
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3791
    const unsigned char* in, int *inlen) {
3792
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3793
}
3794
3795
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3796
    const unsigned char* in, int *inlen) {
3797
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3798
}
3799
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3800
    const unsigned char* in, int *inlen) {
3801
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3802
}
3803
3804
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3805
    const unsigned char* in, int *inlen) {
3806
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3807
}
3808
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3809
    const unsigned char* in, int *inlen) {
3810
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3811
}
3812
3813
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3814
    const unsigned char* in, int *inlen) {
3815
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3816
}
3817
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3818
    const unsigned char* in, int *inlen) {
3819
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3820
}
3821
3822
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3823
    const unsigned char* in, int *inlen) {
3824
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3825
}
3826
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3827
    const unsigned char* in, int *inlen) {
3828
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3829
}
3830
3831
#endif
3832
#endif
3833