Coverage Report

Created: 2023-03-26 06:14

/src/libxml2/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#ifdef LIBXML_HTML_ENABLED
38
#include <libxml/HTMLparser.h>
39
#endif
40
#include <libxml/globals.h>
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
  UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
56
  UChar      *pivot_source;
57
  UChar      *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
    const char *name;
65
    const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73
#if 0
74
#define DEBUG_ENCODING  /* Define this to get encoding traces */
75
#endif
76
#else
77
#endif
78
79
static int xmlLittleEndian = 1;
80
81
/**
82
 * xmlEncodingErrMemory:
83
 * @extra:  extra information
84
 *
85
 * Handle an out of memory condition
86
 */
87
static void
88
xmlEncodingErrMemory(const char *extra)
89
0
{
90
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91
0
}
92
93
/**
94
 * xmlErrEncoding:
95
 * @error:  the error number
96
 * @msg:  the error message
97
 *
98
 * n encoding error
99
 */
100
static void LIBXML_ATTR_FORMAT(2,0)
101
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102
16
{
103
16
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104
16
                    XML_FROM_I18N, error, XML_ERR_FATAL,
105
16
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106
16
}
107
108
#ifdef LIBXML_ICU_ENABLED
109
static uconv_t*
110
openIcuConverter(const char* name, int toUnicode)
111
{
112
  UErrorCode status = U_ZERO_ERROR;
113
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114
  if (conv == NULL)
115
    return NULL;
116
117
  conv->pivot_source = conv->pivot_buf;
118
  conv->pivot_target = conv->pivot_buf;
119
120
  conv->uconv = ucnv_open(name, &status);
121
  if (U_FAILURE(status))
122
    goto error;
123
124
  status = U_ZERO_ERROR;
125
  if (toUnicode) {
126
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127
                        NULL, NULL, NULL, &status);
128
  }
129
  else {
130
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131
                        NULL, NULL, NULL, &status);
132
  }
133
  if (U_FAILURE(status))
134
    goto error;
135
136
  status = U_ZERO_ERROR;
137
  conv->utf8 = ucnv_open("UTF-8", &status);
138
  if (U_SUCCESS(status))
139
    return conv;
140
141
error:
142
  if (conv->uconv)
143
    ucnv_close(conv->uconv);
144
  xmlFree(conv);
145
  return NULL;
146
}
147
148
static void
149
closeIcuConverter(uconv_t *conv)
150
{
151
  if (conv != NULL) {
152
    ucnv_close(conv->uconv);
153
    ucnv_close(conv->utf8);
154
    xmlFree(conv);
155
  }
156
}
157
#endif /* LIBXML_ICU_ENABLED */
158
159
/************************************************************************
160
 *                  *
161
 *    Conversions To/From UTF8 encoding     *
162
 *                  *
163
 ************************************************************************/
164
165
/**
166
 * asciiToUTF8:
167
 * @out:  a pointer to an array of bytes to store the result
168
 * @outlen:  the length of @out
169
 * @in:  a pointer to an array of ASCII chars
170
 * @inlen:  the length of @in
171
 *
172
 * Take a block of ASCII chars in and try to convert it to an UTF-8
173
 * block of chars out.
174
 * Returns 0 if success, or -1 otherwise
175
 * The value of @inlen after return is the number of octets consumed
176
 *     if the return value is positive, else unpredictable.
177
 * The value of @outlen after return is the number of octets produced.
178
 */
179
static int
180
asciiToUTF8(unsigned char* out, int *outlen,
181
0
              const unsigned char* in, int *inlen) {
182
0
    unsigned char* outstart = out;
183
0
    const unsigned char* base = in;
184
0
    const unsigned char* processed = in;
185
0
    unsigned char* outend = out + *outlen;
186
0
    const unsigned char* inend;
187
0
    unsigned int c;
188
189
0
    inend = in + (*inlen);
190
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
191
0
  c= *in++;
192
193
0
        if (out >= outend)
194
0
      break;
195
0
        if (c < 0x80) {
196
0
      *out++ = c;
197
0
  } else {
198
0
      *outlen = out - outstart;
199
0
      *inlen = processed - base;
200
0
      return(-1);
201
0
  }
202
203
0
  processed = (const unsigned char*) in;
204
0
    }
205
0
    *outlen = out - outstart;
206
0
    *inlen = processed - base;
207
0
    return(*outlen);
208
0
}
209
210
#ifdef LIBXML_OUTPUT_ENABLED
211
/**
212
 * UTF8Toascii:
213
 * @out:  a pointer to an array of bytes to store the result
214
 * @outlen:  the length of @out
215
 * @in:  a pointer to an array of UTF-8 chars
216
 * @inlen:  the length of @in
217
 *
218
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219
 * block of chars out.
220
 *
221
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222
 * The value of @inlen after return is the number of octets consumed
223
 *     if the return value is positive, else unpredictable.
224
 * The value of @outlen after return is the number of octets produced.
225
 */
226
static int
227
UTF8Toascii(unsigned char* out, int *outlen,
228
0
              const unsigned char* in, int *inlen) {
229
0
    const unsigned char* processed = in;
230
0
    const unsigned char* outend;
231
0
    const unsigned char* outstart = out;
232
0
    const unsigned char* instart = in;
233
0
    const unsigned char* inend;
234
0
    unsigned int c, d;
235
0
    int trailing;
236
237
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238
0
    if (in == NULL) {
239
        /*
240
   * initialization nothing to do
241
   */
242
0
  *outlen = 0;
243
0
  *inlen = 0;
244
0
  return(0);
245
0
    }
246
0
    inend = in + (*inlen);
247
0
    outend = out + (*outlen);
248
0
    while (in < inend) {
249
0
  d = *in++;
250
0
  if      (d < 0x80)  { c= d; trailing= 0; }
251
0
  else if (d < 0xC0) {
252
      /* trailing byte in leading position */
253
0
      *outlen = out - outstart;
254
0
      *inlen = processed - instart;
255
0
      return(-2);
256
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
257
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
258
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
259
0
  else {
260
      /* no chance for this in Ascii */
261
0
      *outlen = out - outstart;
262
0
      *inlen = processed - instart;
263
0
      return(-2);
264
0
  }
265
266
0
  if (inend - in < trailing) {
267
0
      break;
268
0
  }
269
270
0
  for ( ; trailing; trailing--) {
271
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272
0
    break;
273
0
      c <<= 6;
274
0
      c |= d & 0x3F;
275
0
  }
276
277
  /* assertion: c is a single UTF-4 value */
278
0
  if (c < 0x80) {
279
0
      if (out >= outend)
280
0
    break;
281
0
      *out++ = c;
282
0
  } else {
283
      /* no chance for this in Ascii */
284
0
      *outlen = out - outstart;
285
0
      *inlen = processed - instart;
286
0
      return(-2);
287
0
  }
288
0
  processed = in;
289
0
    }
290
0
    *outlen = out - outstart;
291
0
    *inlen = processed - instart;
292
0
    return(*outlen);
293
0
}
294
#endif /* LIBXML_OUTPUT_ENABLED */
295
296
/**
297
 * isolat1ToUTF8:
298
 * @out:  a pointer to an array of bytes to store the result
299
 * @outlen:  the length of @out
300
 * @in:  a pointer to an array of ISO Latin 1 chars
301
 * @inlen:  the length of @in
302
 *
303
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304
 * block of chars out.
305
 * Returns the number of bytes written if success, or -1 otherwise
306
 * The value of @inlen after return is the number of octets consumed
307
 *     if the return value is positive, else unpredictable.
308
 * The value of @outlen after return is the number of octets produced.
309
 */
310
int
311
isolat1ToUTF8(unsigned char* out, int *outlen,
312
0
              const unsigned char* in, int *inlen) {
313
0
    unsigned char* outstart = out;
314
0
    const unsigned char* base = in;
315
0
    unsigned char* outend;
316
0
    const unsigned char* inend;
317
0
    const unsigned char* instop;
318
319
0
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320
0
  return(-1);
321
322
0
    outend = out + *outlen;
323
0
    inend = in + (*inlen);
324
0
    instop = inend;
325
326
0
    while ((in < inend) && (out < outend - 1)) {
327
0
  if (*in >= 0x80) {
328
0
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
329
0
            *out++ = ((*in) & 0x3F) | 0x80;
330
0
      ++in;
331
0
  }
332
0
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
333
0
  while ((in < instop) && (*in < 0x80)) {
334
0
      *out++ = *in++;
335
0
  }
336
0
    }
337
0
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
338
0
        *out++ = *in++;
339
0
    }
340
0
    *outlen = out - outstart;
341
0
    *inlen = in - base;
342
0
    return(*outlen);
343
0
}
344
345
/**
346
 * UTF8ToUTF8:
347
 * @out:  a pointer to an array of bytes to store the result
348
 * @outlen:  the length of @out
349
 * @inb:  a pointer to an array of UTF-8 chars
350
 * @inlenb:  the length of @in in UTF-8 chars
351
 *
352
 * No op copy operation for UTF8 handling.
353
 *
354
 * Returns the number of bytes written, or -1 if lack of space.
355
 *     The value of *inlen after return is the number of octets consumed
356
 *     if the return value is positive, else unpredictable.
357
 */
358
static int
359
UTF8ToUTF8(unsigned char* out, int *outlen,
360
           const unsigned char* inb, int *inlenb)
361
0
{
362
0
    int len;
363
364
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365
0
  return(-1);
366
0
    if (inb == NULL) {
367
        /* inb == NULL means output is initialized. */
368
0
        *outlen = 0;
369
0
        *inlenb = 0;
370
0
        return(0);
371
0
    }
372
0
    if (*outlen > *inlenb) {
373
0
  len = *inlenb;
374
0
    } else {
375
0
  len = *outlen;
376
0
    }
377
0
    if (len < 0)
378
0
  return(-1);
379
380
    /*
381
     * FIXME: Conversion functions must assure valid UTF-8, so we have
382
     * to check for UTF-8 validity. Preferably, this converter shouldn't
383
     * be used at all.
384
     */
385
0
    memcpy(out, inb, len);
386
387
0
    *outlen = len;
388
0
    *inlenb = len;
389
0
    return(*outlen);
390
0
}
391
392
393
#ifdef LIBXML_OUTPUT_ENABLED
394
/**
395
 * UTF8Toisolat1:
396
 * @out:  a pointer to an array of bytes to store the result
397
 * @outlen:  the length of @out
398
 * @in:  a pointer to an array of UTF-8 chars
399
 * @inlen:  the length of @in
400
 *
401
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402
 * block of chars out.
403
 *
404
 * Returns the number of bytes written if success, -2 if the transcoding fails,
405
           or -1 otherwise
406
 * The value of @inlen after return is the number of octets consumed
407
 *     if the return value is positive, else unpredictable.
408
 * The value of @outlen after return is the number of octets produced.
409
 */
410
int
411
UTF8Toisolat1(unsigned char* out, int *outlen,
412
0
              const unsigned char* in, int *inlen) {
413
0
    const unsigned char* processed = in;
414
0
    const unsigned char* outend;
415
0
    const unsigned char* outstart = out;
416
0
    const unsigned char* instart = in;
417
0
    const unsigned char* inend;
418
0
    unsigned int c, d;
419
0
    int trailing;
420
421
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422
0
    if (in == NULL) {
423
        /*
424
   * initialization nothing to do
425
   */
426
0
  *outlen = 0;
427
0
  *inlen = 0;
428
0
  return(0);
429
0
    }
430
0
    inend = in + (*inlen);
431
0
    outend = out + (*outlen);
432
0
    while (in < inend) {
433
0
  d = *in++;
434
0
  if      (d < 0x80)  { c= d; trailing= 0; }
435
0
  else if (d < 0xC0) {
436
      /* trailing byte in leading position */
437
0
      *outlen = out - outstart;
438
0
      *inlen = processed - instart;
439
0
      return(-2);
440
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
441
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
442
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
443
0
  else {
444
      /* no chance for this in IsoLat1 */
445
0
      *outlen = out - outstart;
446
0
      *inlen = processed - instart;
447
0
      return(-2);
448
0
  }
449
450
0
  if (inend - in < trailing) {
451
0
      break;
452
0
  }
453
454
0
  for ( ; trailing; trailing--) {
455
0
      if (in >= inend)
456
0
    break;
457
0
      if (((d= *in++) & 0xC0) != 0x80) {
458
0
    *outlen = out - outstart;
459
0
    *inlen = processed - instart;
460
0
    return(-2);
461
0
      }
462
0
      c <<= 6;
463
0
      c |= d & 0x3F;
464
0
  }
465
466
  /* assertion: c is a single UTF-4 value */
467
0
  if (c <= 0xFF) {
468
0
      if (out >= outend)
469
0
    break;
470
0
      *out++ = c;
471
0
  } else {
472
      /* no chance for this in IsoLat1 */
473
0
      *outlen = out - outstart;
474
0
      *inlen = processed - instart;
475
0
      return(-2);
476
0
  }
477
0
  processed = in;
478
0
    }
479
0
    *outlen = out - outstart;
480
0
    *inlen = processed - instart;
481
0
    return(*outlen);
482
0
}
483
#endif /* LIBXML_OUTPUT_ENABLED */
484
485
/**
486
 * UTF16LEToUTF8:
487
 * @out:  a pointer to an array of bytes to store the result
488
 * @outlen:  the length of @out
489
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
490
 * @inlenb:  the length of @in in UTF-16LE chars
491
 *
492
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493
 * block of chars out. This function assumes the endian property
494
 * is the same between the native type of this machine and the
495
 * inputed one.
496
 *
497
 * Returns the number of bytes written, or -1 if lack of space, or -2
498
 *     if the transcoding fails (if *in is not a valid utf16 string)
499
 *     The value of *inlen after return is the number of octets consumed
500
 *     if the return value is positive, else unpredictable.
501
 */
502
static int
503
UTF16LEToUTF8(unsigned char* out, int *outlen,
504
            const unsigned char* inb, int *inlenb)
505
859
{
506
859
    unsigned char* outstart = out;
507
859
    const unsigned char* processed = inb;
508
859
    unsigned char* outend;
509
859
    unsigned short* in = (unsigned short*) inb;
510
859
    unsigned short* inend;
511
859
    unsigned int c, d, inlen;
512
859
    unsigned char *tmp;
513
859
    int bits;
514
515
859
    if (*outlen == 0) {
516
0
        *inlenb = 0;
517
0
        return(0);
518
0
    }
519
859
    outend = out + *outlen;
520
859
    if ((*inlenb % 2) == 1)
521
824
        (*inlenb)--;
522
859
    inlen = *inlenb / 2;
523
859
    inend = in + inlen;
524
3.94M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
525
3.94M
        if (xmlLittleEndian) {
526
3.94M
      c= *in++;
527
3.94M
  } else {
528
0
      tmp = (unsigned char *) in;
529
0
      c = *tmp++;
530
0
      c = c | (*tmp << 8);
531
0
      in++;
532
0
  }
533
3.94M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
534
0
      if (in >= inend) {           /* handle split mutli-byte characters */
535
0
    break;
536
0
      }
537
0
      if (xmlLittleEndian) {
538
0
    d = *in++;
539
0
      } else {
540
0
    tmp = (unsigned char *) in;
541
0
    d = *tmp++;
542
0
    d = d | (*tmp << 8);
543
0
    in++;
544
0
      }
545
0
            if ((d & 0xFC00) == 0xDC00) {
546
0
                c &= 0x03FF;
547
0
                c <<= 10;
548
0
                c |= d & 0x03FF;
549
0
                c += 0x10000;
550
0
            }
551
0
            else {
552
0
    *outlen = out - outstart;
553
0
    *inlenb = processed - inb;
554
0
          return(-2);
555
0
      }
556
0
        }
557
558
  /* assertion: c is a single UTF-4 value */
559
3.94M
        if (out >= outend)
560
0
      break;
561
3.94M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
562
3.93M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
563
3.93M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
564
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
565
566
11.8M
        for ( ; bits >= 0; bits-= 6) {
567
7.87M
            if (out >= outend)
568
0
          break;
569
7.87M
            *out++= ((c >> bits) & 0x3F) | 0x80;
570
7.87M
        }
571
3.94M
  processed = (const unsigned char*) in;
572
3.94M
    }
573
859
    *outlen = out - outstart;
574
859
    *inlenb = processed - inb;
575
859
    return(*outlen);
576
859
}
577
578
#ifdef LIBXML_OUTPUT_ENABLED
579
/**
580
 * UTF8ToUTF16LE:
581
 * @outb:  a pointer to an array of bytes to store the result
582
 * @outlen:  the length of @outb
583
 * @in:  a pointer to an array of UTF-8 chars
584
 * @inlen:  the length of @in
585
 *
586
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587
 * block of chars out.
588
 *
589
 * Returns the number of bytes written, or -1 if lack of space, or -2
590
 *     if the transcoding failed.
591
 */
592
static int
593
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594
            const unsigned char* in, int *inlen)
595
0
{
596
0
    unsigned short* out = (unsigned short*) outb;
597
0
    const unsigned char* processed = in;
598
0
    const unsigned char *const instart = in;
599
0
    unsigned short* outstart= out;
600
0
    unsigned short* outend;
601
0
    const unsigned char* inend;
602
0
    unsigned int c, d;
603
0
    int trailing;
604
0
    unsigned char *tmp;
605
0
    unsigned short tmp1, tmp2;
606
607
    /* UTF16LE encoding has no BOM */
608
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609
0
    if (in == NULL) {
610
0
  *outlen = 0;
611
0
  *inlen = 0;
612
0
  return(0);
613
0
    }
614
0
    inend= in + *inlen;
615
0
    outend = out + (*outlen / 2);
616
0
    while (in < inend) {
617
0
      d= *in++;
618
0
      if      (d < 0x80)  { c= d; trailing= 0; }
619
0
      else if (d < 0xC0) {
620
          /* trailing byte in leading position */
621
0
    *outlen = (out - outstart) * 2;
622
0
    *inlen = processed - instart;
623
0
    return(-2);
624
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
625
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
626
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
627
0
      else {
628
  /* no chance for this in UTF-16 */
629
0
  *outlen = (out - outstart) * 2;
630
0
  *inlen = processed - instart;
631
0
  return(-2);
632
0
      }
633
634
0
      if (inend - in < trailing) {
635
0
          break;
636
0
      }
637
638
0
      for ( ; trailing; trailing--) {
639
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640
0
        break;
641
0
          c <<= 6;
642
0
          c |= d & 0x3F;
643
0
      }
644
645
      /* assertion: c is a single UTF-4 value */
646
0
        if (c < 0x10000) {
647
0
            if (out >= outend)
648
0
          break;
649
0
      if (xmlLittleEndian) {
650
0
    *out++ = c;
651
0
      } else {
652
0
    tmp = (unsigned char *) out;
653
0
    *tmp = (unsigned char) c; /* Explicit truncation */
654
0
    *(tmp + 1) = c >> 8 ;
655
0
    out++;
656
0
      }
657
0
        }
658
0
        else if (c < 0x110000) {
659
0
            if (out+1 >= outend)
660
0
          break;
661
0
            c -= 0x10000;
662
0
      if (xmlLittleEndian) {
663
0
    *out++ = 0xD800 | (c >> 10);
664
0
    *out++ = 0xDC00 | (c & 0x03FF);
665
0
      } else {
666
0
    tmp1 = 0xD800 | (c >> 10);
667
0
    tmp = (unsigned char *) out;
668
0
    *tmp = (unsigned char) tmp1; /* Explicit truncation */
669
0
    *(tmp + 1) = tmp1 >> 8;
670
0
    out++;
671
672
0
    tmp2 = 0xDC00 | (c & 0x03FF);
673
0
    tmp = (unsigned char *) out;
674
0
    *tmp  = (unsigned char) tmp2; /* Explicit truncation */
675
0
    *(tmp + 1) = tmp2 >> 8;
676
0
    out++;
677
0
      }
678
0
        }
679
0
        else
680
0
      break;
681
0
  processed = in;
682
0
    }
683
0
    *outlen = (out - outstart) * 2;
684
0
    *inlen = processed - instart;
685
0
    return(*outlen);
686
0
}
687
688
/**
689
 * UTF8ToUTF16:
690
 * @outb:  a pointer to an array of bytes to store the result
691
 * @outlen:  the length of @outb
692
 * @in:  a pointer to an array of UTF-8 chars
693
 * @inlen:  the length of @in
694
 *
695
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696
 * block of chars out.
697
 *
698
 * Returns the number of bytes written, or -1 if lack of space, or -2
699
 *     if the transcoding failed.
700
 */
701
static int
702
UTF8ToUTF16(unsigned char* outb, int *outlen,
703
            const unsigned char* in, int *inlen)
704
0
{
705
0
    if (in == NULL) {
706
  /*
707
   * initialization, add the Byte Order Mark for UTF-16LE
708
   */
709
0
        if (*outlen >= 2) {
710
0
      outb[0] = 0xFF;
711
0
      outb[1] = 0xFE;
712
0
      *outlen = 2;
713
0
      *inlen = 0;
714
#ifdef DEBUG_ENCODING
715
            xmlGenericError(xmlGenericErrorContext,
716
        "Added FFFE Byte Order Mark\n");
717
#endif
718
0
      return(2);
719
0
  }
720
0
  *outlen = 0;
721
0
  *inlen = 0;
722
0
  return(0);
723
0
    }
724
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
725
0
}
726
#endif /* LIBXML_OUTPUT_ENABLED */
727
728
/**
729
 * UTF16BEToUTF8:
730
 * @out:  a pointer to an array of bytes to store the result
731
 * @outlen:  the length of @out
732
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
733
 * @inlenb:  the length of @in in UTF-16 chars
734
 *
735
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736
 * block of chars out. This function assumes the endian property
737
 * is the same between the native type of this machine and the
738
 * inputed one.
739
 *
740
 * Returns the number of bytes written, or -1 if lack of space, or -2
741
 *     if the transcoding fails (if *in is not a valid utf16 string)
742
 * The value of *inlen after return is the number of octets consumed
743
 *     if the return value is positive, else unpredictable.
744
 */
745
static int
746
UTF16BEToUTF8(unsigned char* out, int *outlen,
747
            const unsigned char* inb, int *inlenb)
748
623
{
749
623
    unsigned char* outstart = out;
750
623
    const unsigned char* processed = inb;
751
623
    unsigned char* outend;
752
623
    unsigned short* in = (unsigned short*) inb;
753
623
    unsigned short* inend;
754
623
    unsigned int c, d, inlen;
755
623
    unsigned char *tmp;
756
623
    int bits;
757
758
623
    if (*outlen == 0) {
759
0
        *inlenb = 0;
760
0
        return(0);
761
0
    }
762
623
    outend = out + *outlen;
763
623
    if ((*inlenb % 2) == 1)
764
569
        (*inlenb)--;
765
623
    inlen = *inlenb / 2;
766
623
    inend= in + inlen;
767
5.93M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
768
5.93M
  if (xmlLittleEndian) {
769
5.93M
      tmp = (unsigned char *) in;
770
5.93M
      c = *tmp++;
771
5.93M
      c = (c << 8) | *tmp;
772
5.93M
      in++;
773
5.93M
  } else {
774
0
      c= *in++;
775
0
  }
776
5.93M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
777
15
      if (in >= inend) {           /* handle split mutli-byte characters */
778
0
                break;
779
0
      }
780
15
      if (xmlLittleEndian) {
781
15
    tmp = (unsigned char *) in;
782
15
    d = *tmp++;
783
15
    d = (d << 8) | *tmp;
784
15
    in++;
785
15
      } else {
786
0
    d= *in++;
787
0
      }
788
15
            if ((d & 0xFC00) == 0xDC00) {
789
3
                c &= 0x03FF;
790
3
                c <<= 10;
791
3
                c |= d & 0x03FF;
792
3
                c += 0x10000;
793
3
            }
794
12
            else {
795
12
    *outlen = out - outstart;
796
12
    *inlenb = processed - inb;
797
12
          return(-2);
798
12
      }
799
15
        }
800
801
  /* assertion: c is a single UTF-4 value */
802
5.93M
        if (out >= outend)
803
0
      break;
804
5.93M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
805
5.86M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
806
5.81M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
807
3
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
808
809
17.6M
        for ( ; bits >= 0; bits-= 6) {
810
11.6M
            if (out >= outend)
811
0
          break;
812
11.6M
            *out++= ((c >> bits) & 0x3F) | 0x80;
813
11.6M
        }
814
5.93M
  processed = (const unsigned char*) in;
815
5.93M
    }
816
611
    *outlen = out - outstart;
817
611
    *inlenb = processed - inb;
818
611
    return(*outlen);
819
623
}
820
821
#ifdef LIBXML_OUTPUT_ENABLED
822
/**
823
 * UTF8ToUTF16BE:
824
 * @outb:  a pointer to an array of bytes to store the result
825
 * @outlen:  the length of @outb
826
 * @in:  a pointer to an array of UTF-8 chars
827
 * @inlen:  the length of @in
828
 *
829
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830
 * block of chars out.
831
 *
832
 * Returns the number of byte written, or -1 by lack of space, or -2
833
 *     if the transcoding failed.
834
 */
835
static int
836
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837
            const unsigned char* in, int *inlen)
838
0
{
839
0
    unsigned short* out = (unsigned short*) outb;
840
0
    const unsigned char* processed = in;
841
0
    const unsigned char *const instart = in;
842
0
    unsigned short* outstart= out;
843
0
    unsigned short* outend;
844
0
    const unsigned char* inend;
845
0
    unsigned int c, d;
846
0
    int trailing;
847
0
    unsigned char *tmp;
848
0
    unsigned short tmp1, tmp2;
849
850
    /* UTF-16BE has no BOM */
851
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852
0
    if (in == NULL) {
853
0
  *outlen = 0;
854
0
  *inlen = 0;
855
0
  return(0);
856
0
    }
857
0
    inend= in + *inlen;
858
0
    outend = out + (*outlen / 2);
859
0
    while (in < inend) {
860
0
      d= *in++;
861
0
      if      (d < 0x80)  { c= d; trailing= 0; }
862
0
      else if (d < 0xC0)  {
863
          /* trailing byte in leading position */
864
0
    *outlen = out - outstart;
865
0
    *inlen = processed - instart;
866
0
    return(-2);
867
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
868
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
869
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
870
0
      else {
871
          /* no chance for this in UTF-16 */
872
0
    *outlen = out - outstart;
873
0
    *inlen = processed - instart;
874
0
    return(-2);
875
0
      }
876
877
0
      if (inend - in < trailing) {
878
0
          break;
879
0
      }
880
881
0
      for ( ; trailing; trailing--) {
882
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
883
0
          c <<= 6;
884
0
          c |= d & 0x3F;
885
0
      }
886
887
      /* assertion: c is a single UTF-4 value */
888
0
        if (c < 0x10000) {
889
0
            if (out >= outend)  break;
890
0
      if (xmlLittleEndian) {
891
0
    tmp = (unsigned char *) out;
892
0
    *tmp = c >> 8;
893
0
    *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894
0
    out++;
895
0
      } else {
896
0
    *out++ = c;
897
0
      }
898
0
        }
899
0
        else if (c < 0x110000) {
900
0
            if (out+1 >= outend)  break;
901
0
            c -= 0x10000;
902
0
      if (xmlLittleEndian) {
903
0
    tmp1 = 0xD800 | (c >> 10);
904
0
    tmp = (unsigned char *) out;
905
0
    *tmp = tmp1 >> 8;
906
0
    *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907
0
    out++;
908
909
0
    tmp2 = 0xDC00 | (c & 0x03FF);
910
0
    tmp = (unsigned char *) out;
911
0
    *tmp = tmp2 >> 8;
912
0
    *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913
0
    out++;
914
0
      } else {
915
0
    *out++ = 0xD800 | (c >> 10);
916
0
    *out++ = 0xDC00 | (c & 0x03FF);
917
0
      }
918
0
        }
919
0
        else
920
0
      break;
921
0
  processed = in;
922
0
    }
923
0
    *outlen = (out - outstart) * 2;
924
0
    *inlen = processed - instart;
925
0
    return(*outlen);
926
0
}
927
#endif /* LIBXML_OUTPUT_ENABLED */
928
929
/************************************************************************
930
 *                  *
931
 *    Generic encoding handling routines      *
932
 *                  *
933
 ************************************************************************/
934
935
/**
936
 * xmlDetectCharEncoding:
937
 * @in:  a pointer to the first bytes of the XML entity, must be at least
938
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
939
 * @len:  pointer to the length of the buffer
940
 *
941
 * Guess the encoding of the entity using the first bytes of the entity content
942
 * according to the non-normative appendix F of the XML-1.0 recommendation.
943
 *
944
 * Returns one of the XML_CHAR_ENCODING_... values.
945
 */
946
xmlCharEncoding
947
xmlDetectCharEncoding(const unsigned char* in, int len)
948
156
{
949
156
    if (in == NULL)
950
0
        return(XML_CHAR_ENCODING_NONE);
951
156
    if (len >= 4) {
952
156
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
953
156
      (in[2] == 0x00) && (in[3] == 0x3C))
954
2
      return(XML_CHAR_ENCODING_UCS4BE);
955
154
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956
154
      (in[2] == 0x00) && (in[3] == 0x00))
957
0
      return(XML_CHAR_ENCODING_UCS4LE);
958
154
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
959
154
      (in[2] == 0x3C) && (in[3] == 0x00))
960
0
      return(XML_CHAR_ENCODING_UCS4_2143);
961
154
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962
154
      (in[2] == 0x00) && (in[3] == 0x00))
963
0
      return(XML_CHAR_ENCODING_UCS4_3412);
964
154
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965
154
      (in[2] == 0xA7) && (in[3] == 0x94))
966
0
      return(XML_CHAR_ENCODING_EBCDIC);
967
154
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968
154
      (in[2] == 0x78) && (in[3] == 0x6D))
969
0
      return(XML_CHAR_ENCODING_UTF8);
970
  /*
971
   * Although not part of the recommendation, we also
972
   * attempt an "auto-recognition" of UTF-16LE and
973
   * UTF-16BE encodings.
974
   */
975
154
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976
154
      (in[2] == 0x3F) && (in[3] == 0x00))
977
22
      return(XML_CHAR_ENCODING_UTF16LE);
978
132
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979
132
      (in[2] == 0x00) && (in[3] == 0x3F))
980
2
      return(XML_CHAR_ENCODING_UTF16BE);
981
132
    }
982
130
    if (len >= 3) {
983
  /*
984
   * Errata on XML-1.0 June 20 2001
985
   * We now allow an UTF8 encoded BOM
986
   */
987
130
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988
130
      (in[2] == 0xBF))
989
0
      return(XML_CHAR_ENCODING_UTF8);
990
130
    }
991
    /* For UTF-16 we can recognize by the BOM */
992
130
    if (len >= 2) {
993
130
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
994
32
      return(XML_CHAR_ENCODING_UTF16BE);
995
98
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
996
4
      return(XML_CHAR_ENCODING_UTF16LE);
997
98
    }
998
94
    return(XML_CHAR_ENCODING_NONE);
999
130
}
1000
1001
/**
1002
 * xmlCleanupEncodingAliases:
1003
 *
1004
 * Unregisters all aliases
1005
 */
1006
void
1007
0
xmlCleanupEncodingAliases(void) {
1008
0
    int i;
1009
1010
0
    if (xmlCharEncodingAliases == NULL)
1011
0
  return;
1012
1013
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014
0
  if (xmlCharEncodingAliases[i].name != NULL)
1015
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1016
0
  if (xmlCharEncodingAliases[i].alias != NULL)
1017
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1018
0
    }
1019
0
    xmlCharEncodingAliasesNb = 0;
1020
0
    xmlCharEncodingAliasesMax = 0;
1021
0
    xmlFree(xmlCharEncodingAliases);
1022
0
    xmlCharEncodingAliases = NULL;
1023
0
}
1024
1025
/**
1026
 * xmlGetEncodingAlias:
1027
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1028
 *
1029
 * Lookup an encoding name for the given alias.
1030
 *
1031
 * Returns NULL if not found, otherwise the original name
1032
 */
1033
const char *
1034
6
xmlGetEncodingAlias(const char *alias) {
1035
6
    int i;
1036
6
    char upper[100];
1037
1038
6
    if (alias == NULL)
1039
0
  return(NULL);
1040
1041
6
    if (xmlCharEncodingAliases == NULL)
1042
6
  return(NULL);
1043
1044
0
    for (i = 0;i < 99;i++) {
1045
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1046
0
  if (upper[i] == 0) break;
1047
0
    }
1048
0
    upper[i] = 0;
1049
1050
    /*
1051
     * Walk down the list looking for a definition of the alias
1052
     */
1053
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055
0
      return(xmlCharEncodingAliases[i].name);
1056
0
  }
1057
0
    }
1058
0
    return(NULL);
1059
0
}
1060
1061
/**
1062
 * xmlAddEncodingAlias:
1063
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1064
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1065
 *
1066
 * Registers an alias @alias for an encoding named @name. Existing alias
1067
 * will be overwritten.
1068
 *
1069
 * Returns 0 in case of success, -1 in case of error
1070
 */
1071
int
1072
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1073
0
    int i;
1074
0
    char upper[100];
1075
1076
0
    if ((name == NULL) || (alias == NULL))
1077
0
  return(-1);
1078
1079
0
    for (i = 0;i < 99;i++) {
1080
0
        upper[i] = (char) toupper((unsigned char) alias[i]);
1081
0
  if (upper[i] == 0) break;
1082
0
    }
1083
0
    upper[i] = 0;
1084
1085
0
    if (xmlCharEncodingAliases == NULL) {
1086
0
  xmlCharEncodingAliasesNb = 0;
1087
0
  xmlCharEncodingAliasesMax = 20;
1088
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090
0
  if (xmlCharEncodingAliases == NULL)
1091
0
      return(-1);
1092
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093
0
  xmlCharEncodingAliasesMax *= 2;
1094
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095
0
        xmlRealloc(xmlCharEncodingAliases,
1096
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1097
0
    }
1098
    /*
1099
     * Walk down the list looking for a definition of the alias
1100
     */
1101
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1103
      /*
1104
       * Replace the definition.
1105
       */
1106
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1107
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108
0
      return(0);
1109
0
  }
1110
0
    }
1111
    /*
1112
     * Add the definition
1113
     */
1114
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116
0
    xmlCharEncodingAliasesNb++;
1117
0
    return(0);
1118
0
}
1119
1120
/**
1121
 * xmlDelEncodingAlias:
1122
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1123
 *
1124
 * Unregisters an encoding alias @alias
1125
 *
1126
 * Returns 0 in case of success, -1 in case of error
1127
 */
1128
int
1129
0
xmlDelEncodingAlias(const char *alias) {
1130
0
    int i;
1131
1132
0
    if (alias == NULL)
1133
0
  return(-1);
1134
1135
0
    if (xmlCharEncodingAliases == NULL)
1136
0
  return(-1);
1137
    /*
1138
     * Walk down the list looking for a definition of the alias
1139
     */
1140
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1143
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144
0
      xmlCharEncodingAliasesNb--;
1145
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147
0
      return(0);
1148
0
  }
1149
0
    }
1150
0
    return(-1);
1151
0
}
1152
1153
/**
1154
 * xmlParseCharEncoding:
1155
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1156
 *
1157
 * Compare the string to the encoding schemes already known. Note
1158
 * that the comparison is case insensitive accordingly to the section
1159
 * [XML] 4.3.3 Character Encoding in Entities.
1160
 *
1161
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162
 * if not recognized.
1163
 */
1164
xmlCharEncoding
1165
xmlParseCharEncoding(const char* name)
1166
2
{
1167
2
    const char *alias;
1168
2
    char upper[500];
1169
2
    int i;
1170
1171
2
    if (name == NULL)
1172
0
  return(XML_CHAR_ENCODING_NONE);
1173
1174
    /*
1175
     * Do the alias resolution
1176
     */
1177
2
    alias = xmlGetEncodingAlias(name);
1178
2
    if (alias != NULL)
1179
0
  name = alias;
1180
1181
32
    for (i = 0;i < 499;i++) {
1182
32
        upper[i] = (char) toupper((unsigned char) name[i]);
1183
32
  if (upper[i] == 0) break;
1184
32
    }
1185
2
    upper[i] = 0;
1186
1187
2
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188
2
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189
2
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1190
1191
    /*
1192
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1193
     *       already found and in use
1194
     */
1195
2
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196
2
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1197
1198
2
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199
2
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200
2
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1201
1202
    /*
1203
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1204
     *       already found and in use
1205
     */
1206
2
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207
0
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208
0
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1209
1210
1211
0
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212
0
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213
0
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1214
1215
0
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216
0
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217
0
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1218
1219
0
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220
0
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221
0
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222
0
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223
0
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224
0
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225
0
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1226
1227
0
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228
0
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229
0
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1230
1231
#ifdef DEBUG_ENCODING
1232
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233
#endif
1234
0
    return(XML_CHAR_ENCODING_ERROR);
1235
0
}
1236
1237
/**
1238
 * xmlGetCharEncodingName:
1239
 * @enc:  the encoding
1240
 *
1241
 * The "canonical" name for XML encoding.
1242
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243
 * Section 4.3.3  Character Encoding in Entities
1244
 *
1245
 * Returns the canonical name for the given encoding
1246
 */
1247
1248
const char*
1249
2
xmlGetCharEncodingName(xmlCharEncoding enc) {
1250
2
    switch (enc) {
1251
0
        case XML_CHAR_ENCODING_ERROR:
1252
0
      return(NULL);
1253
0
        case XML_CHAR_ENCODING_NONE:
1254
0
      return(NULL);
1255
0
        case XML_CHAR_ENCODING_UTF8:
1256
0
      return("UTF-8");
1257
0
        case XML_CHAR_ENCODING_UTF16LE:
1258
0
      return("UTF-16");
1259
0
        case XML_CHAR_ENCODING_UTF16BE:
1260
0
      return("UTF-16");
1261
0
        case XML_CHAR_ENCODING_EBCDIC:
1262
0
            return("EBCDIC");
1263
2
        case XML_CHAR_ENCODING_UCS4LE:
1264
2
            return("ISO-10646-UCS-4");
1265
0
        case XML_CHAR_ENCODING_UCS4BE:
1266
0
            return("ISO-10646-UCS-4");
1267
0
        case XML_CHAR_ENCODING_UCS4_2143:
1268
0
            return("ISO-10646-UCS-4");
1269
0
        case XML_CHAR_ENCODING_UCS4_3412:
1270
0
            return("ISO-10646-UCS-4");
1271
0
        case XML_CHAR_ENCODING_UCS2:
1272
0
            return("ISO-10646-UCS-2");
1273
0
        case XML_CHAR_ENCODING_8859_1:
1274
0
      return("ISO-8859-1");
1275
0
        case XML_CHAR_ENCODING_8859_2:
1276
0
      return("ISO-8859-2");
1277
0
        case XML_CHAR_ENCODING_8859_3:
1278
0
      return("ISO-8859-3");
1279
0
        case XML_CHAR_ENCODING_8859_4:
1280
0
      return("ISO-8859-4");
1281
0
        case XML_CHAR_ENCODING_8859_5:
1282
0
      return("ISO-8859-5");
1283
0
        case XML_CHAR_ENCODING_8859_6:
1284
0
      return("ISO-8859-6");
1285
0
        case XML_CHAR_ENCODING_8859_7:
1286
0
      return("ISO-8859-7");
1287
0
        case XML_CHAR_ENCODING_8859_8:
1288
0
      return("ISO-8859-8");
1289
0
        case XML_CHAR_ENCODING_8859_9:
1290
0
      return("ISO-8859-9");
1291
0
        case XML_CHAR_ENCODING_2022_JP:
1292
0
            return("ISO-2022-JP");
1293
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1294
0
            return("Shift-JIS");
1295
0
        case XML_CHAR_ENCODING_EUC_JP:
1296
0
            return("EUC-JP");
1297
0
  case XML_CHAR_ENCODING_ASCII:
1298
0
      return(NULL);
1299
2
    }
1300
0
    return(NULL);
1301
2
}
1302
1303
/************************************************************************
1304
 *                  *
1305
 *      Char encoding handlers        *
1306
 *                  *
1307
 ************************************************************************/
1308
1309
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310
    defined(LIBXML_ISO8859X_ENABLED)
1311
1312
#define DECLARE_ISO_FUNCS(n) \
1313
    static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314
                                   const unsigned char* in, int *inlen); \
1315
    static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316
                                 const unsigned char* in, int *inlen);
1317
1318
/** DOC_DISABLE */
1319
DECLARE_ISO_FUNCS(2)
1320
DECLARE_ISO_FUNCS(3)
1321
DECLARE_ISO_FUNCS(4)
1322
DECLARE_ISO_FUNCS(5)
1323
DECLARE_ISO_FUNCS(6)
1324
DECLARE_ISO_FUNCS(7)
1325
DECLARE_ISO_FUNCS(8)
1326
DECLARE_ISO_FUNCS(9)
1327
DECLARE_ISO_FUNCS(10)
1328
DECLARE_ISO_FUNCS(11)
1329
DECLARE_ISO_FUNCS(13)
1330
DECLARE_ISO_FUNCS(14)
1331
DECLARE_ISO_FUNCS(15)
1332
DECLARE_ISO_FUNCS(16)
1333
/** DOC_ENABLE */
1334
1335
#endif /* LIBXML_ISO8859X_ENABLED */
1336
1337
#ifdef LIBXML_ICONV_ENABLED
1338
  #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339
#else
1340
  #define EMPTY_ICONV
1341
#endif
1342
1343
#ifdef LIBXML_UCONV_ENABLED
1344
  #define EMPTY_UCONV , NULL, NULL
1345
#else
1346
  #define EMPTY_UCONV
1347
#endif
1348
1349
#define MAKE_HANDLER(name, in, out) \
1350
    { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1351
1352
static const xmlCharEncodingHandler defaultHandlers[] = {
1353
    MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354
#ifdef LIBXML_OUTPUT_ENABLED
1355
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359
    ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361
#ifdef LIBXML_HTML_ENABLED
1362
    ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363
#endif
1364
#else
1365
    ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366
    ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367
    ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368
    ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369
    ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370
    ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371
#endif /* LIBXML_OUTPUT_ENABLED */
1372
1373
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374
    defined(LIBXML_ISO8859X_ENABLED)
1375
    ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376
    ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377
    ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378
    ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379
    ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380
    ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381
    ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382
    ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383
    ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384
    ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385
    ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386
    ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387
    ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388
    ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389
#endif
1390
};
1391
1392
#define NUM_DEFAULT_HANDLERS \
1393
208
    (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1394
1395
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1397
1398
/* the size should be growable, but it's not a big deal ... */
1399
0
#define MAX_ENCODING_HANDLERS 50
1400
static xmlCharEncodingHandlerPtr *handlers = NULL;
1401
static int nbCharEncodingHandler = 0;
1402
1403
/**
1404
 * xmlNewCharEncodingHandler:
1405
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1406
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1407
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1408
 *
1409
 * Create and registers an xmlCharEncodingHandler.
1410
 *
1411
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1412
 */
1413
xmlCharEncodingHandlerPtr
1414
xmlNewCharEncodingHandler(const char *name,
1415
                          xmlCharEncodingInputFunc input,
1416
0
                          xmlCharEncodingOutputFunc output) {
1417
0
    xmlCharEncodingHandlerPtr handler;
1418
0
    const char *alias;
1419
0
    char upper[500];
1420
0
    int i;
1421
0
    char *up = NULL;
1422
1423
    /*
1424
     * Do the alias resolution
1425
     */
1426
0
    alias = xmlGetEncodingAlias(name);
1427
0
    if (alias != NULL)
1428
0
  name = alias;
1429
1430
    /*
1431
     * Keep only the uppercase version of the encoding.
1432
     */
1433
0
    if (name == NULL) {
1434
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1435
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1436
0
  return(NULL);
1437
0
    }
1438
0
    for (i = 0;i < 499;i++) {
1439
0
        upper[i] = (char) toupper((unsigned char) name[i]);
1440
0
  if (upper[i] == 0) break;
1441
0
    }
1442
0
    upper[i] = 0;
1443
0
    up = xmlMemStrdup(upper);
1444
0
    if (up == NULL) {
1445
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446
0
  return(NULL);
1447
0
    }
1448
1449
    /*
1450
     * allocate and fill-up an handler block.
1451
     */
1452
0
    handler = (xmlCharEncodingHandlerPtr)
1453
0
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1454
0
    if (handler == NULL) {
1455
0
        xmlFree(up);
1456
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457
0
  return(NULL);
1458
0
    }
1459
0
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460
0
    handler->input = input;
1461
0
    handler->output = output;
1462
0
    handler->name = up;
1463
1464
0
#ifdef LIBXML_ICONV_ENABLED
1465
0
    handler->iconv_in = NULL;
1466
0
    handler->iconv_out = NULL;
1467
0
#endif
1468
#ifdef LIBXML_ICU_ENABLED
1469
    handler->uconv_in = NULL;
1470
    handler->uconv_out = NULL;
1471
#endif
1472
1473
    /*
1474
     * registers and returns the handler.
1475
     */
1476
0
    xmlRegisterCharEncodingHandler(handler);
1477
#ifdef DEBUG_ENCODING
1478
    xmlGenericError(xmlGenericErrorContext,
1479
      "Registered encoding handler for %s\n", name);
1480
#endif
1481
0
    return(handler);
1482
0
}
1483
1484
/**
1485
 * xmlInitCharEncodingHandlers:
1486
 *
1487
 * DEPRECATED: Alias for xmlInitParser.
1488
 */
1489
void
1490
0
xmlInitCharEncodingHandlers(void) {
1491
0
    xmlInitParser();
1492
0
}
1493
1494
/**
1495
 * xmlInitEncodingInternal:
1496
 *
1497
 * Initialize the char encoding support.
1498
 */
1499
void
1500
2
xmlInitEncodingInternal(void) {
1501
2
    unsigned short int tst = 0x1234;
1502
2
    unsigned char *ptr = (unsigned char *) &tst;
1503
1504
2
    if (*ptr == 0x12) xmlLittleEndian = 0;
1505
2
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1506
0
    else {
1507
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508
0
                 "Odd problem at endianness detection\n", NULL);
1509
0
    }
1510
2
}
1511
1512
/**
1513
 * xmlCleanupCharEncodingHandlers:
1514
 *
1515
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516
 * to free global state but see the warnings there. xmlCleanupParser
1517
 * should be only called once at program exit. In most cases, you don't
1518
 * have call cleanup functions at all.
1519
 *
1520
 * Cleanup the memory allocated for the char encoding support, it
1521
 * unregisters all the encoding handlers and the aliases.
1522
 */
1523
void
1524
0
xmlCleanupCharEncodingHandlers(void) {
1525
0
    xmlCleanupEncodingAliases();
1526
1527
0
    if (handlers == NULL) return;
1528
1529
0
    for (;nbCharEncodingHandler > 0;) {
1530
0
        nbCharEncodingHandler--;
1531
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1532
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1533
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1534
0
      xmlFree(handlers[nbCharEncodingHandler]);
1535
0
  }
1536
0
    }
1537
0
    xmlFree(handlers);
1538
0
    handlers = NULL;
1539
0
    nbCharEncodingHandler = 0;
1540
0
}
1541
1542
/**
1543
 * xmlRegisterCharEncodingHandler:
1544
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1545
 *
1546
 * Register the char encoding handler, surprising, isn't it ?
1547
 */
1548
void
1549
0
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550
0
    if (handler == NULL) {
1551
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1552
0
    "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553
0
        return;
1554
0
    }
1555
0
    if (handlers == NULL) {
1556
0
        handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557
0
        if (handlers == NULL) {
1558
0
            xmlEncodingErrMemory("allocating handler table");
1559
0
            goto free_handler;
1560
0
        }
1561
0
    }
1562
1563
0
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566
0
                 "MAX_ENCODING_HANDLERS");
1567
0
        goto free_handler;
1568
0
    }
1569
0
    handlers[nbCharEncodingHandler++] = handler;
1570
0
    return;
1571
1572
0
free_handler:
1573
0
    if (handler != NULL) {
1574
0
        if (handler->name != NULL) {
1575
0
            xmlFree(handler->name);
1576
0
        }
1577
0
        xmlFree(handler);
1578
0
    }
1579
0
}
1580
1581
/**
1582
 * xmlGetCharEncodingHandler:
1583
 * @enc:  an xmlCharEncoding value.
1584
 *
1585
 * Search in the registered set the handler able to read/write that encoding.
1586
 *
1587
 * Returns the handler or NULL if not found
1588
 */
1589
xmlCharEncodingHandlerPtr
1590
219
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591
219
    xmlCharEncodingHandlerPtr handler;
1592
1593
219
    switch (enc) {
1594
0
        case XML_CHAR_ENCODING_ERROR:
1595
0
      return(NULL);
1596
157
        case XML_CHAR_ENCODING_NONE:
1597
157
      return(NULL);
1598
0
        case XML_CHAR_ENCODING_UTF8:
1599
0
      return(NULL);
1600
26
        case XML_CHAR_ENCODING_UTF16LE:
1601
26
      return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602
34
        case XML_CHAR_ENCODING_UTF16BE:
1603
34
      return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604
0
        case XML_CHAR_ENCODING_EBCDIC:
1605
0
            handler = xmlFindCharEncodingHandler("EBCDIC");
1606
0
            if (handler != NULL) return(handler);
1607
0
            handler = xmlFindCharEncodingHandler("ebcdic");
1608
0
            if (handler != NULL) return(handler);
1609
0
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610
0
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
2
        case XML_CHAR_ENCODING_UCS4BE:
1615
2
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616
2
            if (handler != NULL) return(handler);
1617
2
            handler = xmlFindCharEncodingHandler("UCS-4");
1618
2
            if (handler != NULL) return(handler);
1619
0
            handler = xmlFindCharEncodingHandler("UCS4");
1620
0
            if (handler != NULL) return(handler);
1621
0
      break;
1622
0
        case XML_CHAR_ENCODING_UCS4LE:
1623
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624
0
            if (handler != NULL) return(handler);
1625
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1626
0
            if (handler != NULL) return(handler);
1627
0
            handler = xmlFindCharEncodingHandler("UCS4");
1628
0
            if (handler != NULL) return(handler);
1629
0
      break;
1630
0
        case XML_CHAR_ENCODING_UCS4_2143:
1631
0
      break;
1632
0
        case XML_CHAR_ENCODING_UCS4_3412:
1633
0
      break;
1634
0
        case XML_CHAR_ENCODING_UCS2:
1635
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636
0
            if (handler != NULL) return(handler);
1637
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1638
0
            if (handler != NULL) return(handler);
1639
0
            handler = xmlFindCharEncodingHandler("UCS2");
1640
0
            if (handler != NULL) return(handler);
1641
0
      break;
1642
1643
      /*
1644
       * We used to keep ISO Latin encodings native in the
1645
       * generated data. This led to so many problems that
1646
       * this has been removed. One can still change this
1647
       * back by registering no-ops encoders for those
1648
       */
1649
0
        case XML_CHAR_ENCODING_8859_1:
1650
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651
0
      if (handler != NULL) return(handler);
1652
0
      break;
1653
0
        case XML_CHAR_ENCODING_8859_2:
1654
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655
0
      if (handler != NULL) return(handler);
1656
0
      break;
1657
0
        case XML_CHAR_ENCODING_8859_3:
1658
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659
0
      if (handler != NULL) return(handler);
1660
0
      break;
1661
0
        case XML_CHAR_ENCODING_8859_4:
1662
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663
0
      if (handler != NULL) return(handler);
1664
0
      break;
1665
0
        case XML_CHAR_ENCODING_8859_5:
1666
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667
0
      if (handler != NULL) return(handler);
1668
0
      break;
1669
0
        case XML_CHAR_ENCODING_8859_6:
1670
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671
0
      if (handler != NULL) return(handler);
1672
0
      break;
1673
0
        case XML_CHAR_ENCODING_8859_7:
1674
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675
0
      if (handler != NULL) return(handler);
1676
0
      break;
1677
0
        case XML_CHAR_ENCODING_8859_8:
1678
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679
0
      if (handler != NULL) return(handler);
1680
0
      break;
1681
0
        case XML_CHAR_ENCODING_8859_9:
1682
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683
0
      if (handler != NULL) return(handler);
1684
0
      break;
1685
1686
1687
0
        case XML_CHAR_ENCODING_2022_JP:
1688
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689
0
            if (handler != NULL) return(handler);
1690
0
      break;
1691
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1692
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693
0
            if (handler != NULL) return(handler);
1694
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695
0
            if (handler != NULL) return(handler);
1696
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1697
0
            if (handler != NULL) return(handler);
1698
0
      break;
1699
0
        case XML_CHAR_ENCODING_EUC_JP:
1700
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1701
0
            if (handler != NULL) return(handler);
1702
0
      break;
1703
0
  default:
1704
0
      break;
1705
219
    }
1706
1707
#ifdef DEBUG_ENCODING
1708
    xmlGenericError(xmlGenericErrorContext,
1709
      "No handler found for encoding %d\n", enc);
1710
#endif
1711
0
    return(NULL);
1712
219
}
1713
1714
/**
1715
 * xmlFindCharEncodingHandler:
1716
 * @name:  a string describing the char encoding.
1717
 *
1718
 * Search in the registered set the handler able to read/write that encoding
1719
 * or create a new one.
1720
 *
1721
 * Returns the handler or NULL if not found
1722
 */
1723
xmlCharEncodingHandlerPtr
1724
4
xmlFindCharEncodingHandler(const char *name) {
1725
4
    const char *nalias;
1726
4
    const char *norig;
1727
4
    xmlCharEncoding alias;
1728
4
#ifdef LIBXML_ICONV_ENABLED
1729
4
    xmlCharEncodingHandlerPtr enc;
1730
4
    iconv_t icv_in, icv_out;
1731
4
#endif /* LIBXML_ICONV_ENABLED */
1732
#ifdef LIBXML_ICU_ENABLED
1733
    xmlCharEncodingHandlerPtr encu;
1734
    uconv_t *ucv_in, *ucv_out;
1735
#endif /* LIBXML_ICU_ENABLED */
1736
4
    char upper[100];
1737
4
    int i;
1738
1739
4
    if (name == NULL) return(NULL);
1740
4
    if (name[0] == 0) return(NULL);
1741
1742
    /*
1743
     * Do the alias resolution
1744
     */
1745
4
    norig = name;
1746
4
    nalias = xmlGetEncodingAlias(name);
1747
4
    if (nalias != NULL)
1748
0
  name = nalias;
1749
1750
    /*
1751
     * Check first for directly registered encoding names
1752
     */
1753
44
    for (i = 0;i < 99;i++) {
1754
44
        upper[i] = (char) toupper((unsigned char) name[i]);
1755
44
  if (upper[i] == 0) break;
1756
44
    }
1757
4
    upper[i] = 0;
1758
1759
36
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760
32
        if (strcmp(upper, defaultHandlers[i].name) == 0)
1761
0
            return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1762
32
    }
1763
1764
4
    if (handlers != NULL) {
1765
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1766
0
            if (!strcmp(upper, handlers[i]->name)) {
1767
#ifdef DEBUG_ENCODING
1768
                xmlGenericError(xmlGenericErrorContext,
1769
                        "Found registered handler for encoding %s\n", name);
1770
#endif
1771
0
                return(handlers[i]);
1772
0
            }
1773
0
        }
1774
0
    }
1775
1776
4
#ifdef LIBXML_ICONV_ENABLED
1777
    /* check whether iconv can handle this */
1778
4
    icv_in = iconv_open("UTF-8", name);
1779
4
    icv_out = iconv_open(name, "UTF-8");
1780
4
    if (icv_in == (iconv_t) -1) {
1781
2
        icv_in = iconv_open("UTF-8", upper);
1782
2
    }
1783
4
    if (icv_out == (iconv_t) -1) {
1784
2
  icv_out = iconv_open(upper, "UTF-8");
1785
2
    }
1786
4
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787
2
      enc = (xmlCharEncodingHandlerPtr)
1788
2
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1789
2
      if (enc == NULL) {
1790
0
          iconv_close(icv_in);
1791
0
          iconv_close(icv_out);
1792
0
    return(NULL);
1793
0
      }
1794
2
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795
2
      enc->name = xmlMemStrdup(name);
1796
2
            if (enc->name == NULL) {
1797
0
                xmlFree(enc);
1798
0
                iconv_close(icv_in);
1799
0
                iconv_close(icv_out);
1800
0
                return(NULL);
1801
0
            }
1802
2
      enc->input = NULL;
1803
2
      enc->output = NULL;
1804
2
      enc->iconv_in = icv_in;
1805
2
      enc->iconv_out = icv_out;
1806
#ifdef DEBUG_ENCODING
1807
            xmlGenericError(xmlGenericErrorContext,
1808
        "Found iconv handler for encoding %s\n", name);
1809
#endif
1810
2
      return enc;
1811
2
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1812
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1813
0
        "iconv : problems with filters for '%s'\n", name);
1814
0
      if (icv_in != (iconv_t) -1)
1815
0
    iconv_close(icv_in);
1816
0
      else
1817
0
    iconv_close(icv_out);
1818
0
    }
1819
2
#endif /* LIBXML_ICONV_ENABLED */
1820
#ifdef LIBXML_ICU_ENABLED
1821
    /* check whether icu can handle this */
1822
    ucv_in = openIcuConverter(name, 1);
1823
    ucv_out = openIcuConverter(name, 0);
1824
    if (ucv_in != NULL && ucv_out != NULL) {
1825
      encu = (xmlCharEncodingHandlerPtr)
1826
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1827
      if (encu == NULL) {
1828
                closeIcuConverter(ucv_in);
1829
                closeIcuConverter(ucv_out);
1830
    return(NULL);
1831
      }
1832
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1833
      encu->name = xmlMemStrdup(name);
1834
            if (encu->name == NULL) {
1835
                xmlFree(encu);
1836
                closeIcuConverter(ucv_in);
1837
                closeIcuConverter(ucv_out);
1838
                return(NULL);
1839
            }
1840
      encu->input = NULL;
1841
      encu->output = NULL;
1842
      encu->uconv_in = ucv_in;
1843
      encu->uconv_out = ucv_out;
1844
#ifdef DEBUG_ENCODING
1845
            xmlGenericError(xmlGenericErrorContext,
1846
        "Found ICU converter handler for encoding %s\n", name);
1847
#endif
1848
      return encu;
1849
    } else if (ucv_in != NULL || ucv_out != NULL) {
1850
            closeIcuConverter(ucv_in);
1851
            closeIcuConverter(ucv_out);
1852
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1853
        "ICU converter : problems with filters for '%s'\n", name);
1854
    }
1855
#endif /* LIBXML_ICU_ENABLED */
1856
1857
#ifdef DEBUG_ENCODING
1858
    xmlGenericError(xmlGenericErrorContext,
1859
      "No handler found for encoding %s\n", name);
1860
#endif
1861
1862
    /*
1863
     * Fallback using the canonical names
1864
     */
1865
2
    alias = xmlParseCharEncoding(norig);
1866
2
    if (alias != XML_CHAR_ENCODING_ERROR) {
1867
2
        const char* canon;
1868
2
        canon = xmlGetCharEncodingName(alias);
1869
2
        if ((canon != NULL) && (strcmp(name, canon))) {
1870
0
      return(xmlFindCharEncodingHandler(canon));
1871
0
        }
1872
2
    }
1873
1874
    /* If "none of the above", give up */
1875
2
    return(NULL);
1876
2
}
1877
1878
/************************************************************************
1879
 *                  *
1880
 *    ICONV based generic conversion functions    *
1881
 *                  *
1882
 ************************************************************************/
1883
1884
#ifdef LIBXML_ICONV_ENABLED
1885
/**
1886
 * xmlIconvWrapper:
1887
 * @cd:   iconv converter data structure
1888
 * @out:  a pointer to an array of bytes to store the result
1889
 * @outlen:  the length of @out
1890
 * @in:  a pointer to an array of input bytes
1891
 * @inlen:  the length of @in
1892
 *
1893
 * Returns 0 if success, or
1894
 *     -1 by lack of space, or
1895
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1896
 *        the result of transformation can't fit into the encoding we want), or
1897
 *     -3 if there the last byte can't form a single output char.
1898
 *
1899
 * The value of @inlen after return is the number of octets consumed
1900
 *     as the return value is positive, else unpredictable.
1901
 * The value of @outlen after return is the number of octets produced.
1902
 */
1903
static int
1904
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1905
4
                const unsigned char *in, int *inlen) {
1906
4
    size_t icv_inlen, icv_outlen;
1907
4
    const char *icv_in = (const char *) in;
1908
4
    char *icv_out = (char *) out;
1909
4
    size_t ret;
1910
1911
4
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1912
0
        if (outlen != NULL) *outlen = 0;
1913
0
        return(-1);
1914
0
    }
1915
4
    icv_inlen = *inlen;
1916
4
    icv_outlen = *outlen;
1917
    /*
1918
     * Some versions take const, other versions take non-const input.
1919
     */
1920
4
    ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1921
4
    *inlen -= icv_inlen;
1922
4
    *outlen -= icv_outlen;
1923
4
    if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1924
4
#ifdef EILSEQ
1925
4
        if (errno == EILSEQ) {
1926
4
            return -2;
1927
4
        } else
1928
0
#endif
1929
0
#ifdef E2BIG
1930
0
        if (errno == E2BIG) {
1931
0
            return -1;
1932
0
        } else
1933
0
#endif
1934
0
#ifdef EINVAL
1935
0
        if (errno == EINVAL) {
1936
0
            return -3;
1937
0
        } else
1938
0
#endif
1939
0
        {
1940
0
            return -3;
1941
0
        }
1942
4
    }
1943
0
    return 0;
1944
4
}
1945
#endif /* LIBXML_ICONV_ENABLED */
1946
1947
/************************************************************************
1948
 *                  *
1949
 *    ICU based generic conversion functions    *
1950
 *                  *
1951
 ************************************************************************/
1952
1953
#ifdef LIBXML_ICU_ENABLED
1954
/**
1955
 * xmlUconvWrapper:
1956
 * @cd: ICU uconverter data structure
1957
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1958
 * @out:  a pointer to an array of bytes to store the result
1959
 * @outlen:  the length of @out
1960
 * @in:  a pointer to an array of input bytes
1961
 * @inlen:  the length of @in
1962
 * @flush: if true, indicates end of input
1963
 *
1964
 * Returns 0 if success, or
1965
 *     -1 by lack of space, or
1966
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1967
 *        the result of transformation can't fit into the encoding we want), or
1968
 *     -3 if there the last byte can't form a single output char.
1969
 *
1970
 * The value of @inlen after return is the number of octets consumed
1971
 *     as the return value is positive, else unpredictable.
1972
 * The value of @outlen after return is the number of octets produced.
1973
 */
1974
static int
1975
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1976
                const unsigned char *in, int *inlen, int flush) {
1977
    const char *ucv_in = (const char *) in;
1978
    char *ucv_out = (char *) out;
1979
    UErrorCode err = U_ZERO_ERROR;
1980
1981
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1982
        if (outlen != NULL) *outlen = 0;
1983
        return(-1);
1984
    }
1985
1986
    if (toUnicode) {
1987
        /* encoding => UTF-16 => UTF-8 */
1988
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1989
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1990
                       &cd->pivot_source, &cd->pivot_target,
1991
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1992
    } else {
1993
        /* UTF-8 => UTF-16 => encoding */
1994
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1995
                       &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1996
                       &cd->pivot_source, &cd->pivot_target,
1997
                       cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1998
    }
1999
    *inlen = ucv_in - (const char*) in;
2000
    *outlen = ucv_out - (char *) out;
2001
    if (U_SUCCESS(err)) {
2002
        /* reset pivot buf if this is the last call for input (flush==TRUE) */
2003
        if (flush)
2004
            cd->pivot_source = cd->pivot_target = cd->pivot_buf;
2005
        return 0;
2006
    }
2007
    if (err == U_BUFFER_OVERFLOW_ERROR)
2008
        return -1;
2009
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2010
        return -2;
2011
    return -3;
2012
}
2013
#endif /* LIBXML_ICU_ENABLED */
2014
2015
/************************************************************************
2016
 *                  *
2017
 *    The real API used by libxml for on-the-fly conversion *
2018
 *                  *
2019
 ************************************************************************/
2020
2021
/**
2022
 * xmlEncInputChunk:
2023
 * @handler:  encoding handler
2024
 * @out:  a pointer to an array of bytes to store the result
2025
 * @outlen:  the length of @out
2026
 * @in:  a pointer to an array of input bytes
2027
 * @inlen:  the length of @in
2028
 * @flush:  flush (ICU-related)
2029
 *
2030
 * Returns 0 if success, or
2031
 *     -1 by lack of space, or
2032
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2033
 *        the result of transformation can't fit into the encoding we want), or
2034
 *     -3 if there the last byte can't form a single output char.
2035
 *
2036
 * The value of @inlen after return is the number of octets consumed
2037
 *     as the return value is 0, else unpredictable.
2038
 * The value of @outlen after return is the number of octets produced.
2039
 */
2040
int
2041
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2042
1.48k
                 int *outlen, const unsigned char *in, int *inlen, int flush) {
2043
1.48k
    int ret;
2044
1.48k
    (void)flush;
2045
2046
1.48k
    if (handler->input != NULL) {
2047
1.48k
        ret = handler->input(out, outlen, in, inlen);
2048
1.48k
        if (ret > 0)
2049
103
           ret = 0;
2050
1.48k
    }
2051
4
#ifdef LIBXML_ICONV_ENABLED
2052
4
    else if (handler->iconv_in != NULL) {
2053
4
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2054
4
    }
2055
0
#endif /* LIBXML_ICONV_ENABLED */
2056
#ifdef LIBXML_ICU_ENABLED
2057
    else if (handler->uconv_in != NULL) {
2058
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2059
                              flush);
2060
    }
2061
#endif /* LIBXML_ICU_ENABLED */
2062
0
    else {
2063
0
        *outlen = 0;
2064
0
        *inlen = 0;
2065
0
        ret = -2;
2066
0
    }
2067
2068
1.48k
    return(ret);
2069
1.48k
}
2070
2071
/**
2072
 * xmlEncOutputChunk:
2073
 * @handler:  encoding handler
2074
 * @out:  a pointer to an array of bytes to store the result
2075
 * @outlen:  the length of @out
2076
 * @in:  a pointer to an array of input bytes
2077
 * @inlen:  the length of @in
2078
 *
2079
 * Returns 0 if success, or
2080
 *     -1 by lack of space, or
2081
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2082
 *        the result of transformation can't fit into the encoding we want), or
2083
 *     -3 if there the last byte can't form a single output char.
2084
 *     -4 if no output function was found.
2085
 *
2086
 * The value of @inlen after return is the number of octets consumed
2087
 *     as the return value is 0, else unpredictable.
2088
 * The value of @outlen after return is the number of octets produced.
2089
 */
2090
static int
2091
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2092
0
                  int *outlen, const unsigned char *in, int *inlen) {
2093
0
    int ret;
2094
2095
0
    if (handler->output != NULL) {
2096
0
        ret = handler->output(out, outlen, in, inlen);
2097
0
        if (ret > 0)
2098
0
           ret = 0;
2099
0
    }
2100
0
#ifdef LIBXML_ICONV_ENABLED
2101
0
    else if (handler->iconv_out != NULL) {
2102
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2103
0
    }
2104
0
#endif /* LIBXML_ICONV_ENABLED */
2105
#ifdef LIBXML_ICU_ENABLED
2106
    else if (handler->uconv_out != NULL) {
2107
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2108
                              1);
2109
    }
2110
#endif /* LIBXML_ICU_ENABLED */
2111
0
    else {
2112
0
        *outlen = 0;
2113
0
        *inlen = 0;
2114
0
        ret = -4;
2115
0
    }
2116
2117
0
    return(ret);
2118
0
}
2119
2120
/**
2121
 * xmlCharEncFirstLine:
2122
 * @handler:  char encoding transformation data structure
2123
 * @out:  an xmlBuffer for the output.
2124
 * @in:  an xmlBuffer for the input
2125
 *
2126
 * DEPERECATED: Don't use.
2127
 */
2128
int
2129
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2130
0
                    xmlBufferPtr in) {
2131
0
    return(xmlCharEncInFunc(handler, out, in));
2132
0
}
2133
2134
/**
2135
 * xmlCharEncInput:
2136
 * @input: a parser input buffer
2137
 * @flush: try to flush all the raw buffer
2138
 *
2139
 * Generic front-end for the encoding handler on parser input
2140
 *
2141
 * Returns the number of byte written if success, or
2142
 *     -1 general error
2143
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2144
 *        the result of transformation can't fit into the encoding we want), or
2145
 */
2146
int
2147
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2148
3.42k
{
2149
3.42k
    int ret;
2150
3.42k
    size_t written;
2151
3.42k
    size_t toconv;
2152
3.42k
    int c_in;
2153
3.42k
    int c_out;
2154
3.42k
    xmlBufPtr in;
2155
3.42k
    xmlBufPtr out;
2156
2157
3.42k
    if ((input == NULL) || (input->encoder == NULL) ||
2158
3.42k
        (input->buffer == NULL) || (input->raw == NULL))
2159
0
        return (-1);
2160
3.42k
    out = input->buffer;
2161
3.42k
    in = input->raw;
2162
2163
3.42k
    toconv = xmlBufUse(in);
2164
3.42k
    if (toconv == 0)
2165
1.93k
        return (0);
2166
1.48k
    if ((toconv > 64 * 1024) && (flush == 0))
2167
60
        toconv = 64 * 1024;
2168
1.48k
    written = xmlBufAvail(out);
2169
1.48k
    if (toconv * 2 >= written) {
2170
117
        if (xmlBufGrow(out, toconv * 2) < 0)
2171
0
            return (-1);
2172
117
        written = xmlBufAvail(out);
2173
117
    }
2174
1.48k
    if ((written > 128 * 1024) && (flush == 0))
2175
60
        written = 128 * 1024;
2176
2177
1.48k
    c_in = toconv;
2178
1.48k
    c_out = written;
2179
1.48k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2180
1.48k
                           xmlBufContent(in), &c_in, flush);
2181
1.48k
    xmlBufShrink(in, c_in);
2182
1.48k
    xmlBufAddLen(out, c_out);
2183
1.48k
    if (ret == -1)
2184
0
        ret = -3;
2185
2186
1.48k
    switch (ret) {
2187
1.47k
        case 0:
2188
#ifdef DEBUG_ENCODING
2189
            xmlGenericError(xmlGenericErrorContext,
2190
                            "converted %d bytes to %d bytes of input\n",
2191
                            c_in, c_out);
2192
#endif
2193
1.47k
            break;
2194
0
        case -1:
2195
#ifdef DEBUG_ENCODING
2196
            xmlGenericError(xmlGenericErrorContext,
2197
                         "converted %d bytes to %d bytes of input, %d left\n",
2198
                            c_in, c_out, (int)xmlBufUse(in));
2199
#endif
2200
0
            break;
2201
0
        case -3:
2202
#ifdef DEBUG_ENCODING
2203
            xmlGenericError(xmlGenericErrorContext,
2204
                        "converted %d bytes to %d bytes of input, %d left\n",
2205
                            c_in, c_out, (int)xmlBufUse(in));
2206
#endif
2207
0
            break;
2208
16
        case -2: {
2209
16
            char buf[50];
2210
16
            const xmlChar *content = xmlBufContent(in);
2211
2212
16
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2213
16
         content[0], content[1],
2214
16
         content[2], content[3]);
2215
16
      buf[49] = 0;
2216
16
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2217
16
        "input conversion failed due to input error, bytes %s\n",
2218
16
               buf);
2219
16
        }
2220
1.48k
    }
2221
    /*
2222
     * Ignore when input buffer is not on a boundary
2223
     */
2224
1.48k
    if (ret == -3)
2225
0
        ret = 0;
2226
1.48k
    return (c_out? c_out : ret);
2227
1.48k
}
2228
2229
/**
2230
 * xmlCharEncInFunc:
2231
 * @handler:  char encoding transformation data structure
2232
 * @out:  an xmlBuffer for the output.
2233
 * @in:  an xmlBuffer for the input
2234
 *
2235
 * Generic front-end for the encoding handler input function
2236
 *
2237
 * Returns the number of byte written if success, or
2238
 *     -1 general error
2239
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2240
 *        the result of transformation can't fit into the encoding we want), or
2241
 */
2242
int
2243
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2244
                 xmlBufferPtr in)
2245
0
{
2246
0
    int ret;
2247
0
    int written;
2248
0
    int toconv;
2249
2250
0
    if (handler == NULL)
2251
0
        return (-1);
2252
0
    if (out == NULL)
2253
0
        return (-1);
2254
0
    if (in == NULL)
2255
0
        return (-1);
2256
2257
0
    toconv = in->use;
2258
0
    if (toconv == 0)
2259
0
        return (0);
2260
0
    written = out->size - out->use -1; /* count '\0' */
2261
0
    if (toconv * 2 >= written) {
2262
0
        xmlBufferGrow(out, out->size + toconv * 2);
2263
0
        written = out->size - out->use - 1;
2264
0
    }
2265
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2266
0
                           in->content, &toconv, 1);
2267
0
    xmlBufferShrink(in, toconv);
2268
0
    out->use += written;
2269
0
    out->content[out->use] = 0;
2270
0
    if (ret == -1)
2271
0
        ret = -3;
2272
2273
0
    switch (ret) {
2274
0
        case 0:
2275
#ifdef DEBUG_ENCODING
2276
            xmlGenericError(xmlGenericErrorContext,
2277
                            "converted %d bytes to %d bytes of input\n",
2278
                            toconv, written);
2279
#endif
2280
0
            break;
2281
0
        case -1:
2282
#ifdef DEBUG_ENCODING
2283
            xmlGenericError(xmlGenericErrorContext,
2284
                         "converted %d bytes to %d bytes of input, %d left\n",
2285
                            toconv, written, in->use);
2286
#endif
2287
0
            break;
2288
0
        case -3:
2289
#ifdef DEBUG_ENCODING
2290
            xmlGenericError(xmlGenericErrorContext,
2291
                        "converted %d bytes to %d bytes of input, %d left\n",
2292
                            toconv, written, in->use);
2293
#endif
2294
0
            break;
2295
0
        case -2: {
2296
0
            char buf[50];
2297
2298
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2299
0
         in->content[0], in->content[1],
2300
0
         in->content[2], in->content[3]);
2301
0
      buf[49] = 0;
2302
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2303
0
        "input conversion failed due to input error, bytes %s\n",
2304
0
               buf);
2305
0
        }
2306
0
    }
2307
    /*
2308
     * Ignore when input buffer is not on a boundary
2309
     */
2310
0
    if (ret == -3)
2311
0
        ret = 0;
2312
0
    return (written? written : ret);
2313
0
}
2314
2315
#ifdef LIBXML_OUTPUT_ENABLED
2316
/**
2317
 * xmlCharEncOutput:
2318
 * @output: a parser output buffer
2319
 * @init: is this an initialization call without data
2320
 *
2321
 * Generic front-end for the encoding handler on parser output
2322
 * a first call with @init == 1 has to be made first to initiate the
2323
 * output in case of non-stateless encoding needing to initiate their
2324
 * state or the output (like the BOM in UTF16).
2325
 * In case of UTF8 sequence conversion errors for the given encoder,
2326
 * the content will be automatically remapped to a CharRef sequence.
2327
 *
2328
 * Returns the number of byte written if success, or
2329
 *     -1 general error
2330
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2331
 *        the result of transformation can't fit into the encoding we want), or
2332
 */
2333
int
2334
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2335
0
{
2336
0
    int ret;
2337
0
    size_t written;
2338
0
    int writtentot = 0;
2339
0
    size_t toconv;
2340
0
    int c_in;
2341
0
    int c_out;
2342
0
    xmlBufPtr in;
2343
0
    xmlBufPtr out;
2344
2345
0
    if ((output == NULL) || (output->encoder == NULL) ||
2346
0
        (output->buffer == NULL) || (output->conv == NULL))
2347
0
        return (-1);
2348
0
    out = output->conv;
2349
0
    in = output->buffer;
2350
2351
0
retry:
2352
2353
0
    written = xmlBufAvail(out);
2354
2355
    /*
2356
     * First specific handling of the initialization call
2357
     */
2358
0
    if (init) {
2359
0
        c_in = 0;
2360
0
        c_out = written;
2361
        /* TODO: Check return value. */
2362
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2363
0
                          NULL, &c_in);
2364
0
        xmlBufAddLen(out, c_out);
2365
#ifdef DEBUG_ENCODING
2366
  xmlGenericError(xmlGenericErrorContext,
2367
    "initialized encoder\n");
2368
#endif
2369
0
        return(c_out);
2370
0
    }
2371
2372
    /*
2373
     * Conversion itself.
2374
     */
2375
0
    toconv = xmlBufUse(in);
2376
0
    if (toconv == 0)
2377
0
        return (writtentot);
2378
0
    if (toconv > 64 * 1024)
2379
0
        toconv = 64 * 1024;
2380
0
    if (toconv * 4 >= written) {
2381
0
        xmlBufGrow(out, toconv * 4);
2382
0
        written = xmlBufAvail(out);
2383
0
    }
2384
0
    if (written > 256 * 1024)
2385
0
        written = 256 * 1024;
2386
2387
0
    c_in = toconv;
2388
0
    c_out = written;
2389
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2390
0
                            xmlBufContent(in), &c_in);
2391
0
    xmlBufShrink(in, c_in);
2392
0
    xmlBufAddLen(out, c_out);
2393
0
    writtentot += c_out;
2394
0
    if (ret == -1) {
2395
0
        if (c_out > 0) {
2396
            /* Can be a limitation of iconv or uconv */
2397
0
            goto retry;
2398
0
        }
2399
0
        ret = -3;
2400
0
    }
2401
2402
    /*
2403
     * Attempt to handle error cases
2404
     */
2405
0
    switch (ret) {
2406
0
        case 0:
2407
#ifdef DEBUG_ENCODING
2408
      xmlGenericError(xmlGenericErrorContext,
2409
        "converted %d bytes to %d bytes of output\n",
2410
              c_in, c_out);
2411
#endif
2412
0
      break;
2413
0
        case -1:
2414
#ifdef DEBUG_ENCODING
2415
      xmlGenericError(xmlGenericErrorContext,
2416
        "output conversion failed by lack of space\n");
2417
#endif
2418
0
      break;
2419
0
        case -3:
2420
#ifdef DEBUG_ENCODING
2421
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2422
              c_in, c_out, (int) xmlBufUse(in));
2423
#endif
2424
0
      break;
2425
0
        case -4:
2426
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2427
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2428
0
            ret = -1;
2429
0
            break;
2430
0
        case -2: {
2431
0
      xmlChar charref[20];
2432
0
      int len = xmlBufUse(in);
2433
0
            xmlChar *content = xmlBufContent(in);
2434
0
      int cur, charrefLen;
2435
2436
0
      cur = xmlGetUTF8Char(content, &len);
2437
0
      if (cur <= 0)
2438
0
                break;
2439
2440
#ifdef DEBUG_ENCODING
2441
            xmlGenericError(xmlGenericErrorContext,
2442
                    "handling output conversion error\n");
2443
            xmlGenericError(xmlGenericErrorContext,
2444
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2445
                    content[0], content[1],
2446
                    content[2], content[3]);
2447
#endif
2448
            /*
2449
             * Removes the UTF8 sequence, and replace it by a charref
2450
             * and continue the transcoding phase, hoping the error
2451
             * did not mangle the encoder state.
2452
             */
2453
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2454
0
                             "&#%d;", cur);
2455
0
            xmlBufShrink(in, len);
2456
0
            xmlBufGrow(out, charrefLen * 4);
2457
0
            c_out = xmlBufAvail(out);
2458
0
            c_in = charrefLen;
2459
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2460
0
                                    charref, &c_in);
2461
2462
0
      if ((ret < 0) || (c_in != charrefLen)) {
2463
0
    char buf[50];
2464
2465
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2466
0
       content[0], content[1],
2467
0
       content[2], content[3]);
2468
0
    buf[49] = 0;
2469
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2470
0
        "output conversion failed due to conv error, bytes %s\n",
2471
0
             buf);
2472
0
    content[0] = ' ';
2473
0
                break;
2474
0
      }
2475
2476
0
            xmlBufAddLen(out, c_out);
2477
0
            writtentot += c_out;
2478
0
            goto retry;
2479
0
  }
2480
0
    }
2481
0
    return(writtentot ? writtentot : ret);
2482
0
}
2483
#endif
2484
2485
/**
2486
 * xmlCharEncOutFunc:
2487
 * @handler:  char encoding transformation data structure
2488
 * @out:  an xmlBuffer for the output.
2489
 * @in:  an xmlBuffer for the input
2490
 *
2491
 * Generic front-end for the encoding handler output function
2492
 * a first call with @in == NULL has to be made firs to initiate the
2493
 * output in case of non-stateless encoding needing to initiate their
2494
 * state or the output (like the BOM in UTF16).
2495
 * In case of UTF8 sequence conversion errors for the given encoder,
2496
 * the content will be automatically remapped to a CharRef sequence.
2497
 *
2498
 * Returns the number of byte written if success, or
2499
 *     -1 general error
2500
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2501
 *        the result of transformation can't fit into the encoding we want), or
2502
 */
2503
int
2504
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2505
0
                  xmlBufferPtr in) {
2506
0
    int ret;
2507
0
    int written;
2508
0
    int writtentot = 0;
2509
0
    int toconv;
2510
2511
0
    if (handler == NULL) return(-1);
2512
0
    if (out == NULL) return(-1);
2513
2514
0
retry:
2515
2516
0
    written = out->size - out->use;
2517
2518
0
    if (written > 0)
2519
0
  written--; /* Gennady: count '/0' */
2520
2521
    /*
2522
     * First specific handling of in = NULL, i.e. the initialization call
2523
     */
2524
0
    if (in == NULL) {
2525
0
        toconv = 0;
2526
        /* TODO: Check return value. */
2527
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2528
0
                          NULL, &toconv);
2529
0
        out->use += written;
2530
0
        out->content[out->use] = 0;
2531
#ifdef DEBUG_ENCODING
2532
  xmlGenericError(xmlGenericErrorContext,
2533
    "initialized encoder\n");
2534
#endif
2535
0
        return(0);
2536
0
    }
2537
2538
    /*
2539
     * Conversion itself.
2540
     */
2541
0
    toconv = in->use;
2542
0
    if (toconv == 0)
2543
0
  return(0);
2544
0
    if (toconv * 4 >= written) {
2545
0
        xmlBufferGrow(out, toconv * 4);
2546
0
  written = out->size - out->use - 1;
2547
0
    }
2548
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2549
0
                            in->content, &toconv);
2550
0
    xmlBufferShrink(in, toconv);
2551
0
    out->use += written;
2552
0
    writtentot += written;
2553
0
    out->content[out->use] = 0;
2554
0
    if (ret == -1) {
2555
0
        if (written > 0) {
2556
            /* Can be a limitation of iconv or uconv */
2557
0
            goto retry;
2558
0
        }
2559
0
        ret = -3;
2560
0
    }
2561
2562
    /*
2563
     * Attempt to handle error cases
2564
     */
2565
0
    switch (ret) {
2566
0
        case 0:
2567
#ifdef DEBUG_ENCODING
2568
      xmlGenericError(xmlGenericErrorContext,
2569
        "converted %d bytes to %d bytes of output\n",
2570
              toconv, written);
2571
#endif
2572
0
      break;
2573
0
        case -1:
2574
#ifdef DEBUG_ENCODING
2575
      xmlGenericError(xmlGenericErrorContext,
2576
        "output conversion failed by lack of space\n");
2577
#endif
2578
0
      break;
2579
0
        case -3:
2580
#ifdef DEBUG_ENCODING
2581
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2582
              toconv, written, in->use);
2583
#endif
2584
0
      break;
2585
0
        case -4:
2586
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2587
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2588
0
      ret = -1;
2589
0
            break;
2590
0
        case -2: {
2591
0
      xmlChar charref[20];
2592
0
      int len = in->use;
2593
0
      const xmlChar *utf = (const xmlChar *) in->content;
2594
0
      int cur, charrefLen;
2595
2596
0
      cur = xmlGetUTF8Char(utf, &len);
2597
0
      if (cur <= 0)
2598
0
                break;
2599
2600
#ifdef DEBUG_ENCODING
2601
            xmlGenericError(xmlGenericErrorContext,
2602
                    "handling output conversion error\n");
2603
            xmlGenericError(xmlGenericErrorContext,
2604
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2605
                    in->content[0], in->content[1],
2606
                    in->content[2], in->content[3]);
2607
#endif
2608
            /*
2609
             * Removes the UTF8 sequence, and replace it by a charref
2610
             * and continue the transcoding phase, hoping the error
2611
             * did not mangle the encoder state.
2612
             */
2613
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2614
0
                             "&#%d;", cur);
2615
0
            xmlBufferShrink(in, len);
2616
0
            xmlBufferGrow(out, charrefLen * 4);
2617
0
      written = out->size - out->use - 1;
2618
0
            toconv = charrefLen;
2619
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2620
0
                                    charref, &toconv);
2621
2622
0
      if ((ret < 0) || (toconv != charrefLen)) {
2623
0
    char buf[50];
2624
2625
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2626
0
       in->content[0], in->content[1],
2627
0
       in->content[2], in->content[3]);
2628
0
    buf[49] = 0;
2629
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2630
0
        "output conversion failed due to conv error, bytes %s\n",
2631
0
             buf);
2632
0
    in->content[0] = ' ';
2633
0
          break;
2634
0
      }
2635
2636
0
            out->use += written;
2637
0
            writtentot += written;
2638
0
            out->content[out->use] = 0;
2639
0
            goto retry;
2640
0
  }
2641
0
    }
2642
0
    return(writtentot ? writtentot : ret);
2643
0
}
2644
2645
/**
2646
 * xmlCharEncCloseFunc:
2647
 * @handler:  char encoding transformation data structure
2648
 *
2649
 * Generic front-end for encoding handler close function
2650
 *
2651
 * Returns 0 if success, or -1 in case of error
2652
 */
2653
int
2654
62
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2655
62
    int ret = 0;
2656
62
    int tofree = 0;
2657
62
    int i = 0;
2658
2659
62
    if (handler == NULL) return(-1);
2660
2661
172
    for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2662
170
        if (handler == &defaultHandlers[i])
2663
60
            return(0);
2664
170
    }
2665
2666
2
    if (handlers != NULL) {
2667
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2668
0
            if (handler == handlers[i])
2669
0
                return(0);
2670
0
  }
2671
0
    }
2672
2
#ifdef LIBXML_ICONV_ENABLED
2673
    /*
2674
     * Iconv handlers can be used only once, free the whole block.
2675
     * and the associated icon resources.
2676
     */
2677
2
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2678
2
        tofree = 1;
2679
2
  if (handler->iconv_out != NULL) {
2680
2
      if (iconv_close(handler->iconv_out))
2681
0
    ret = -1;
2682
2
      handler->iconv_out = NULL;
2683
2
  }
2684
2
  if (handler->iconv_in != NULL) {
2685
2
      if (iconv_close(handler->iconv_in))
2686
0
    ret = -1;
2687
2
      handler->iconv_in = NULL;
2688
2
  }
2689
2
    }
2690
2
#endif /* LIBXML_ICONV_ENABLED */
2691
#ifdef LIBXML_ICU_ENABLED
2692
    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2693
        tofree = 1;
2694
  if (handler->uconv_out != NULL) {
2695
      closeIcuConverter(handler->uconv_out);
2696
      handler->uconv_out = NULL;
2697
  }
2698
  if (handler->uconv_in != NULL) {
2699
      closeIcuConverter(handler->uconv_in);
2700
      handler->uconv_in = NULL;
2701
  }
2702
    }
2703
#endif
2704
2
    if (tofree) {
2705
        /* free up only dynamic handlers iconv/uconv */
2706
2
        if (handler->name != NULL)
2707
2
            xmlFree(handler->name);
2708
2
        handler->name = NULL;
2709
2
        xmlFree(handler);
2710
2
    }
2711
#ifdef DEBUG_ENCODING
2712
    if (ret)
2713
        xmlGenericError(xmlGenericErrorContext,
2714
    "failed to close the encoding handler\n");
2715
    else
2716
        xmlGenericError(xmlGenericErrorContext,
2717
    "closed the encoding handler\n");
2718
#endif
2719
2720
2
    return(ret);
2721
2
}
2722
2723
/**
2724
 * xmlByteConsumed:
2725
 * @ctxt: an XML parser context
2726
 *
2727
 * This function provides the current index of the parser relative
2728
 * to the start of the current entity. This function is computed in
2729
 * bytes from the beginning starting at zero and finishing at the
2730
 * size in byte of the file if parsing a file. The function is
2731
 * of constant cost if the input is UTF-8 but can be costly if run
2732
 * on non-UTF-8 input.
2733
 *
2734
 * Returns the index in bytes from the beginning of the entity or -1
2735
 *         in case the index could not be computed.
2736
 */
2737
long
2738
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2739
0
    xmlParserInputPtr in;
2740
2741
0
    if (ctxt == NULL) return(-1);
2742
0
    in = ctxt->input;
2743
0
    if (in == NULL)  return(-1);
2744
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2745
0
        unsigned int unused = 0;
2746
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2747
        /*
2748
   * Encoding conversion, compute the number of unused original
2749
   * bytes from the input not consumed and subtract that from
2750
   * the raw consumed value, this is not a cheap operation
2751
   */
2752
0
        if (in->end - in->cur > 0) {
2753
0
      unsigned char convbuf[32000];
2754
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2755
0
      int toconv = in->end - in->cur, written = 32000;
2756
2757
0
      int ret;
2758
2759
0
            do {
2760
0
                toconv = in->end - cur;
2761
0
                written = 32000;
2762
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2763
0
                                        cur, &toconv);
2764
0
                if (ret < 0) {
2765
0
                    if (written > 0)
2766
0
                        ret = -2;
2767
0
                    else
2768
0
                        return(-1);
2769
0
                }
2770
0
                unused += written;
2771
0
                cur += toconv;
2772
0
            } while (ret == -2);
2773
0
  }
2774
0
  if (in->buf->rawconsumed < unused)
2775
0
      return(-1);
2776
0
  return(in->buf->rawconsumed - unused);
2777
0
    }
2778
0
    return(in->consumed + (in->cur - in->base));
2779
0
}
2780
2781
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2782
#ifdef LIBXML_ISO8859X_ENABLED
2783
2784
/**
2785
 * UTF8ToISO8859x:
2786
 * @out:  a pointer to an array of bytes to store the result
2787
 * @outlen:  the length of @out
2788
 * @in:  a pointer to an array of UTF-8 chars
2789
 * @inlen:  the length of @in
2790
 * @xlattable: the 2-level transcoding table
2791
 *
2792
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2793
 * block of chars out.
2794
 *
2795
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2796
 * The value of @inlen after return is the number of octets consumed
2797
 *     as the return value is positive, else unpredictable.
2798
 * The value of @outlen after return is the number of octets consumed.
2799
 */
2800
static int
2801
UTF8ToISO8859x(unsigned char* out, int *outlen,
2802
              const unsigned char* in, int *inlen,
2803
              const unsigned char* const xlattable) {
2804
    const unsigned char* outstart = out;
2805
    const unsigned char* inend;
2806
    const unsigned char* instart = in;
2807
    const unsigned char* processed = in;
2808
2809
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2810
        (xlattable == NULL))
2811
  return(-1);
2812
    if (in == NULL) {
2813
        /*
2814
        * initialization nothing to do
2815
        */
2816
        *outlen = 0;
2817
        *inlen = 0;
2818
        return(0);
2819
    }
2820
    inend = in + (*inlen);
2821
    while (in < inend) {
2822
        unsigned char d = *in++;
2823
        if  (d < 0x80)  {
2824
            *out++ = d;
2825
        } else if (d < 0xC0) {
2826
            /* trailing byte in leading position */
2827
            *outlen = out - outstart;
2828
            *inlen = processed - instart;
2829
            return(-2);
2830
        } else if (d < 0xE0) {
2831
            unsigned char c;
2832
            if (!(in < inend)) {
2833
                /* trailing byte not in input buffer */
2834
                *outlen = out - outstart;
2835
                *inlen = processed - instart;
2836
                return(-3);
2837
            }
2838
            c = *in++;
2839
            if ((c & 0xC0) != 0x80) {
2840
                /* not a trailing byte */
2841
                *outlen = out - outstart;
2842
                *inlen = processed - instart;
2843
                return(-2);
2844
            }
2845
            c = c & 0x3F;
2846
            d = d & 0x1F;
2847
            d = xlattable [48 + c + xlattable [d] * 64];
2848
            if (d == 0) {
2849
                /* not in character set */
2850
                *outlen = out - outstart;
2851
                *inlen = processed - instart;
2852
                return(-2);
2853
            }
2854
            *out++ = d;
2855
        } else if (d < 0xF0) {
2856
            unsigned char c1;
2857
            unsigned char c2;
2858
            if (!(in < inend - 1)) {
2859
                /* trailing bytes not in input buffer */
2860
                *outlen = out - outstart;
2861
                *inlen = processed - instart;
2862
                return(-3);
2863
            }
2864
            c1 = *in++;
2865
            if ((c1 & 0xC0) != 0x80) {
2866
                /* not a trailing byte (c1) */
2867
                *outlen = out - outstart;
2868
                *inlen = processed - instart;
2869
                return(-2);
2870
            }
2871
            c2 = *in++;
2872
            if ((c2 & 0xC0) != 0x80) {
2873
                /* not a trailing byte (c2) */
2874
                *outlen = out - outstart;
2875
                *inlen = processed - instart;
2876
                return(-2);
2877
            }
2878
            c1 = c1 & 0x3F;
2879
            c2 = c2 & 0x3F;
2880
      d = d & 0x0F;
2881
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2882
      xlattable [32 + d] * 64] * 64];
2883
            if (d == 0) {
2884
                /* not in character set */
2885
                *outlen = out - outstart;
2886
                *inlen = processed - instart;
2887
                return(-2);
2888
            }
2889
            *out++ = d;
2890
        } else {
2891
            /* cannot transcode >= U+010000 */
2892
            *outlen = out - outstart;
2893
            *inlen = processed - instart;
2894
            return(-2);
2895
        }
2896
        processed = in;
2897
    }
2898
    *outlen = out - outstart;
2899
    *inlen = processed - instart;
2900
    return(*outlen);
2901
}
2902
2903
/**
2904
 * ISO8859xToUTF8
2905
 * @out:  a pointer to an array of bytes to store the result
2906
 * @outlen:  the length of @out
2907
 * @in:  a pointer to an array of ISO Latin 1 chars
2908
 * @inlen:  the length of @in
2909
 *
2910
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2911
 * block of chars out.
2912
 * Returns 0 if success, or -1 otherwise
2913
 * The value of @inlen after return is the number of octets consumed
2914
 * The value of @outlen after return is the number of octets produced.
2915
 */
2916
static int
2917
ISO8859xToUTF8(unsigned char* out, int *outlen,
2918
              const unsigned char* in, int *inlen,
2919
              unsigned short const *unicodetable) {
2920
    unsigned char* outstart = out;
2921
    unsigned char* outend;
2922
    const unsigned char* instart = in;
2923
    const unsigned char* inend;
2924
    const unsigned char* instop;
2925
    unsigned int c;
2926
2927
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2928
        (in == NULL) || (unicodetable == NULL))
2929
  return(-1);
2930
    outend = out + *outlen;
2931
    inend = in + *inlen;
2932
    instop = inend;
2933
2934
    while ((in < inend) && (out < outend - 2)) {
2935
        if (*in >= 0x80) {
2936
            c = unicodetable [*in - 0x80];
2937
            if (c == 0) {
2938
                /* undefined code point */
2939
                *outlen = out - outstart;
2940
                *inlen = in - instart;
2941
                return (-1);
2942
            }
2943
            if (c < 0x800) {
2944
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2945
                *out++ = (c & 0x3F) | 0x80;
2946
            } else {
2947
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2948
                *out++ = ((c >>  6) & 0x3F) | 0x80;
2949
                *out++ = (c & 0x3F) | 0x80;
2950
            }
2951
            ++in;
2952
        }
2953
        if (instop - in > outend - out) instop = in + (outend - out);
2954
        while ((*in < 0x80) && (in < instop)) {
2955
            *out++ = *in++;
2956
        }
2957
    }
2958
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2959
        *out++ =  *in++;
2960
    }
2961
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
2962
        *out++ =  *in++;
2963
    }
2964
    *outlen = out - outstart;
2965
    *inlen = in - instart;
2966
    return (*outlen);
2967
}
2968
2969
2970
/************************************************************************
2971
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2972
 ************************************************************************/
2973
2974
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2975
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2976
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2977
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2978
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2979
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2980
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2981
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2982
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2983
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2984
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2985
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2986
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2987
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2988
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2989
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2990
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2991
};
2992
2993
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2994
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2995
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3002
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3003
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3004
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3005
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3006
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3007
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3009
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3010
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3011
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3014
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3015
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3016
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3017
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3018
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3019
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3020
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3021
};
3022
3023
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3024
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3025
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3026
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3027
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3028
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3029
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3030
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3031
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3032
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3033
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3034
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3035
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3036
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3037
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3038
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3039
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3040
};
3041
3042
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3043
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3044
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3051
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3052
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3053
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3054
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3055
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3056
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3057
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3068
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3070
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3071
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3072
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3073
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3074
};
3075
3076
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3077
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3078
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3079
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3080
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3081
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3082
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3083
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3084
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3085
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3086
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3087
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3088
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3089
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3090
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3091
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3092
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3093
};
3094
3095
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3096
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3097
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3104
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3105
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3106
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3107
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3108
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3110
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3112
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3113
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3114
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3115
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3120
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3121
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3122
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3123
};
3124
3125
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3126
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3127
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3128
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3129
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3130
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3131
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3132
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3133
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3134
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3135
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3136
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3137
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3138
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3139
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3140
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3141
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3142
};
3143
3144
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3145
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3153
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3154
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3155
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3157
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3158
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3159
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3160
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3161
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172
};
3173
3174
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3175
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3176
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3177
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3178
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3179
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3180
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3181
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3182
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3183
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3184
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3185
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3186
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3187
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3188
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3189
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3190
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3191
};
3192
3193
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3194
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3196
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3200
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3202
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3203
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3204
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3211
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3212
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3213
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3214
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217
};
3218
3219
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3220
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3225
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3226
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3227
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3228
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3229
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3230
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3231
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3232
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3233
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3234
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3235
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3236
};
3237
3238
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3239
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3240
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3249
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3250
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3256
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3263
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3264
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3265
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3266
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3267
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
};
3271
3272
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3273
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3278
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3280
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3281
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3282
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3283
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3285
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3286
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3287
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3288
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3289
};
3290
3291
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3292
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3294
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3302
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3303
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3309
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3311
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3316
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3321
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
};
3324
3325
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3326
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3327
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3328
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3329
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3330
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3331
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3332
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3333
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3334
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3335
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3336
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3337
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3338
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3339
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3340
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3341
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3342
};
3343
3344
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3345
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3353
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3354
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3355
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3356
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3357
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3358
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3359
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3362
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
};
3369
3370
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3371
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3372
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3373
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3374
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3375
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3376
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3377
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3378
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3379
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3380
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3381
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3382
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3383
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3384
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3385
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3386
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3387
};
3388
3389
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3390
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3398
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3399
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3400
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3401
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3402
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3403
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3404
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3405
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3406
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3408
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3409
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3418
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3419
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3420
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3421
};
3422
3423
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3424
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3425
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3426
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3427
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3428
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3429
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3430
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3431
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3432
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3433
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3434
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3435
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3436
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3437
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3438
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3439
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3440
};
3441
3442
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3443
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3451
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3452
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3458
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3459
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3460
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3461
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3462
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3467
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
};
3471
3472
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3473
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3474
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3475
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3476
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3477
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3478
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3479
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3480
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3481
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3482
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3483
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3484
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3485
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3486
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3487
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3488
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3489
};
3490
3491
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3492
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3500
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3501
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3502
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3503
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3512
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3513
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3514
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3515
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3517
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3518
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3519
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3520
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3521
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3522
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3523
};
3524
3525
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3526
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3527
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3528
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3529
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3530
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3531
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3532
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3533
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3534
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3535
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3536
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3537
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3538
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3539
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3540
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3541
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3542
};
3543
3544
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3545
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3553
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3554
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3555
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3560
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3561
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3562
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3565
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3580
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3582
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3583
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3585
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3586
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3587
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3588
};
3589
3590
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3591
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3596
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3597
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3598
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3599
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3602
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3603
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3606
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3607
};
3608
3609
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3610
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3620
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3621
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3628
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3633
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3634
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3635
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3636
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3637
};
3638
3639
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3640
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3641
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3642
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3643
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3644
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3645
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3646
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3647
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3648
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3649
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3650
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3651
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3652
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3653
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3654
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3655
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3656
};
3657
3658
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3659
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3660
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3667
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3668
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3669
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3670
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3671
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3672
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3676
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3678
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3685
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3692
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3695
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3696
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3697
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3698
};
3699
3700
3701
/*
3702
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3703
 */
3704
3705
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3706
    const unsigned char* in, int *inlen) {
3707
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3708
}
3709
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3710
    const unsigned char* in, int *inlen) {
3711
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3712
}
3713
3714
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3715
    const unsigned char* in, int *inlen) {
3716
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3717
}
3718
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3719
    const unsigned char* in, int *inlen) {
3720
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3721
}
3722
3723
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3724
    const unsigned char* in, int *inlen) {
3725
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3726
}
3727
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3728
    const unsigned char* in, int *inlen) {
3729
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3730
}
3731
3732
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3733
    const unsigned char* in, int *inlen) {
3734
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3735
}
3736
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3737
    const unsigned char* in, int *inlen) {
3738
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3739
}
3740
3741
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3742
    const unsigned char* in, int *inlen) {
3743
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3744
}
3745
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3746
    const unsigned char* in, int *inlen) {
3747
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3748
}
3749
3750
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3751
    const unsigned char* in, int *inlen) {
3752
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3753
}
3754
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3755
    const unsigned char* in, int *inlen) {
3756
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3757
}
3758
3759
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3760
    const unsigned char* in, int *inlen) {
3761
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3762
}
3763
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3764
    const unsigned char* in, int *inlen) {
3765
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3766
}
3767
3768
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3769
    const unsigned char* in, int *inlen) {
3770
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3771
}
3772
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3773
    const unsigned char* in, int *inlen) {
3774
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3775
}
3776
3777
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3778
    const unsigned char* in, int *inlen) {
3779
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3780
}
3781
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3782
    const unsigned char* in, int *inlen) {
3783
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3784
}
3785
3786
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3787
    const unsigned char* in, int *inlen) {
3788
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3789
}
3790
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3791
    const unsigned char* in, int *inlen) {
3792
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3793
}
3794
3795
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3796
    const unsigned char* in, int *inlen) {
3797
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3798
}
3799
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3800
    const unsigned char* in, int *inlen) {
3801
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3802
}
3803
3804
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3805
    const unsigned char* in, int *inlen) {
3806
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3807
}
3808
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3809
    const unsigned char* in, int *inlen) {
3810
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3811
}
3812
3813
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3814
    const unsigned char* in, int *inlen) {
3815
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3816
}
3817
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3818
    const unsigned char* in, int *inlen) {
3819
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3820
}
3821
3822
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3823
    const unsigned char* in, int *inlen) {
3824
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3825
}
3826
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3827
    const unsigned char* in, int *inlen) {
3828
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3829
}
3830
3831
#endif
3832
#endif
3833