Coverage Report

Created: 2026-04-29 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/encoding.c
Line
Count
Source
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
29
#ifdef HAVE_CTYPE_H
30
#include <ctype.h>
31
#endif
32
#ifdef HAVE_STDLIB_H
33
#include <stdlib.h>
34
#endif
35
#ifdef LIBXML_ICONV_ENABLED
36
#ifdef HAVE_ERRNO_H
37
#include <errno.h>
38
#endif
39
#endif
40
#include <libxml/encoding.h>
41
#include <libxml/xmlmemory.h>
42
#ifdef LIBXML_HTML_ENABLED
43
#include <libxml/HTMLparser.h>
44
#endif
45
#include <libxml/globals.h>
46
#include <libxml/xmlerror.h>
47
48
#include "buf.h"
49
#include "enc.h"
50
51
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56
struct _xmlCharEncodingAlias {
57
    const char *name;
58
    const char *alias;
59
};
60
61
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62
static int xmlCharEncodingAliasesNb = 0;
63
static int xmlCharEncodingAliasesMax = 0;
64
65
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66
#if 0
67
#define DEBUG_ENCODING  /* Define this to get encoding traces */
68
#endif
69
#else
70
#ifdef LIBXML_ISO8859X_ENABLED
71
static void xmlRegisterCharEncodingHandlersISO8859x (void);
72
#endif
73
#endif
74
75
static int xmlLittleEndian = 1;
76
77
/**
78
 * xmlEncodingErrMemory:
79
 * @extra:  extra informations
80
 *
81
 * Handle an out of memory condition
82
 */
83
static void
84
xmlEncodingErrMemory(const char *extra)
85
0
{
86
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87
0
}
88
89
/**
90
 * xmlErrEncoding:
91
 * @error:  the error number
92
 * @msg:  the error message
93
 *
94
 * n encoding error
95
 */
96
static void LIBXML_ATTR_FORMAT(2,0)
97
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98
5.81k
{
99
5.81k
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100
5.81k
                    XML_FROM_I18N, error, XML_ERR_FATAL,
101
5.81k
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102
5.81k
}
103
104
#ifdef LIBXML_ICU_ENABLED
105
static uconv_t*
106
openIcuConverter(const char* name, int toUnicode)
107
{
108
  UErrorCode status = U_ZERO_ERROR;
109
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110
  if (conv == NULL)
111
    return NULL;
112
113
  conv->uconv = ucnv_open(name, &status);
114
  if (U_FAILURE(status))
115
    goto error;
116
117
  status = U_ZERO_ERROR;
118
  if (toUnicode) {
119
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120
                        NULL, NULL, NULL, &status);
121
  }
122
  else {
123
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124
                        NULL, NULL, NULL, &status);
125
  }
126
  if (U_FAILURE(status))
127
    goto error;
128
129
  status = U_ZERO_ERROR;
130
  conv->utf8 = ucnv_open("UTF-8", &status);
131
  if (U_SUCCESS(status))
132
    return conv;
133
134
error:
135
  if (conv->uconv)
136
    ucnv_close(conv->uconv);
137
  xmlFree(conv);
138
  return NULL;
139
}
140
141
static void
142
closeIcuConverter(uconv_t *conv)
143
{
144
  if (conv != NULL) {
145
    ucnv_close(conv->uconv);
146
    ucnv_close(conv->utf8);
147
    xmlFree(conv);
148
  }
149
}
150
#endif /* LIBXML_ICU_ENABLED */
151
152
/************************************************************************
153
 *                  *
154
 *    Conversions To/From UTF8 encoding     *
155
 *                  *
156
 ************************************************************************/
157
158
/**
159
 * asciiToUTF8:
160
 * @out:  a pointer to an array of bytes to store the result
161
 * @outlen:  the length of @out
162
 * @in:  a pointer to an array of ASCII chars
163
 * @inlen:  the length of @in
164
 *
165
 * Take a block of ASCII chars in and try to convert it to an UTF-8
166
 * block of chars out.
167
 * Returns 0 if success, or -1 otherwise
168
 * The value of @inlen after return is the number of octets consumed
169
 *     if the return value is positive, else unpredictable.
170
 * The value of @outlen after return is the number of octets consumed.
171
 */
172
static int
173
asciiToUTF8(unsigned char* out, int *outlen,
174
93.2k
              const unsigned char* in, int *inlen) {
175
93.2k
    unsigned char* outstart = out;
176
93.2k
    const unsigned char* base = in;
177
93.2k
    const unsigned char* processed = in;
178
93.2k
    unsigned char* outend = out + *outlen;
179
93.2k
    const unsigned char* inend;
180
93.2k
    unsigned int c;
181
182
93.2k
    inend = in + (*inlen);
183
1.81M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
184
1.81M
  c= *in++;
185
186
1.81M
        if (out >= outend)
187
0
      break;
188
1.81M
        if (c < 0x80) {
189
1.72M
      *out++ = c;
190
1.72M
  } else {
191
89.1k
      *outlen = out - outstart;
192
89.1k
      *inlen = processed - base;
193
89.1k
      return(-1);
194
89.1k
  }
195
196
1.72M
  processed = (const unsigned char*) in;
197
1.72M
    }
198
4.11k
    *outlen = out - outstart;
199
4.11k
    *inlen = processed - base;
200
4.11k
    return(*outlen);
201
93.2k
}
202
203
#ifdef LIBXML_OUTPUT_ENABLED
204
/**
205
 * UTF8Toascii:
206
 * @out:  a pointer to an array of bytes to store the result
207
 * @outlen:  the length of @out
208
 * @in:  a pointer to an array of UTF-8 chars
209
 * @inlen:  the length of @in
210
 *
211
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212
 * block of chars out.
213
 *
214
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215
 * The value of @inlen after return is the number of octets consumed
216
 *     if the return value is positive, else unpredictable.
217
 * The value of @outlen after return is the number of octets consumed.
218
 */
219
static int
220
UTF8Toascii(unsigned char* out, int *outlen,
221
0
              const unsigned char* in, int *inlen) {
222
0
    const unsigned char* processed = in;
223
0
    const unsigned char* outend;
224
0
    const unsigned char* outstart = out;
225
0
    const unsigned char* instart = in;
226
0
    const unsigned char* inend;
227
0
    unsigned int c, d;
228
0
    int trailing;
229
230
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
231
0
    if (in == NULL) {
232
        /*
233
   * initialization nothing to do
234
   */
235
0
  *outlen = 0;
236
0
  *inlen = 0;
237
0
  return(0);
238
0
    }
239
0
    inend = in + (*inlen);
240
0
    outend = out + (*outlen);
241
0
    while (in < inend) {
242
0
  d = *in++;
243
0
  if      (d < 0x80)  { c= d; trailing= 0; }
244
0
  else if (d < 0xC0) {
245
      /* trailing byte in leading position */
246
0
      *outlen = out - outstart;
247
0
      *inlen = processed - instart;
248
0
      return(-2);
249
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
250
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
251
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
252
0
  else {
253
      /* no chance for this in Ascii */
254
0
      *outlen = out - outstart;
255
0
      *inlen = processed - instart;
256
0
      return(-2);
257
0
  }
258
259
0
  if (inend - in < trailing) {
260
0
      break;
261
0
  }
262
263
0
  for ( ; trailing; trailing--) {
264
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265
0
    break;
266
0
      c <<= 6;
267
0
      c |= d & 0x3F;
268
0
  }
269
270
  /* assertion: c is a single UTF-4 value */
271
0
  if (c < 0x80) {
272
0
      if (out >= outend)
273
0
    break;
274
0
      *out++ = c;
275
0
  } else {
276
      /* no chance for this in Ascii */
277
0
      *outlen = out - outstart;
278
0
      *inlen = processed - instart;
279
0
      return(-2);
280
0
  }
281
0
  processed = in;
282
0
    }
283
0
    *outlen = out - outstart;
284
0
    *inlen = processed - instart;
285
0
    return(*outlen);
286
0
}
287
#endif /* LIBXML_OUTPUT_ENABLED */
288
289
/**
290
 * isolat1ToUTF8:
291
 * @out:  a pointer to an array of bytes to store the result
292
 * @outlen:  the length of @out
293
 * @in:  a pointer to an array of ISO Latin 1 chars
294
 * @inlen:  the length of @in
295
 *
296
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297
 * block of chars out.
298
 * Returns the number of bytes written if success, or -1 otherwise
299
 * The value of @inlen after return is the number of octets consumed
300
 *     if the return value is positive, else unpredictable.
301
 * The value of @outlen after return is the number of octets consumed.
302
 */
303
int
304
isolat1ToUTF8(unsigned char* out, int *outlen,
305
63.7k
              const unsigned char* in, int *inlen) {
306
63.7k
    unsigned char* outstart = out;
307
63.7k
    const unsigned char* base = in;
308
63.7k
    unsigned char* outend;
309
63.7k
    const unsigned char* inend;
310
63.7k
    const unsigned char* instop;
311
312
63.7k
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313
0
  return(-1);
314
315
63.7k
    outend = out + *outlen;
316
63.7k
    inend = in + (*inlen);
317
63.7k
    instop = inend;
318
319
12.2M
    while ((in < inend) && (out < outend - 1)) {
320
12.1M
  if (*in >= 0x80) {
321
12.1M
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
322
12.1M
            *out++ = ((*in) & 0x3F) | 0x80;
323
12.1M
      ++in;
324
12.1M
  }
325
12.1M
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
326
48.3M
  while ((in < instop) && (*in < 0x80)) {
327
36.1M
      *out++ = *in++;
328
36.1M
  }
329
12.1M
    }
330
63.7k
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
331
0
        *out++ = *in++;
332
0
    }
333
63.7k
    *outlen = out - outstart;
334
63.7k
    *inlen = in - base;
335
63.7k
    return(*outlen);
336
63.7k
}
337
338
/**
339
 * UTF8ToUTF8:
340
 * @out:  a pointer to an array of bytes to store the result
341
 * @outlen:  the length of @out
342
 * @inb:  a pointer to an array of UTF-8 chars
343
 * @inlenb:  the length of @in in UTF-8 chars
344
 *
345
 * No op copy operation for UTF8 handling.
346
 *
347
 * Returns the number of bytes written, or -1 if lack of space.
348
 *     The value of *inlen after return is the number of octets consumed
349
 *     if the return value is positive, else unpredictable.
350
 */
351
static int
352
UTF8ToUTF8(unsigned char* out, int *outlen,
353
           const unsigned char* inb, int *inlenb)
354
0
{
355
0
    int len;
356
357
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
358
0
  return(-1);
359
0
    if (inb == NULL) {
360
        /* inb == NULL means output is initialized. */
361
0
        *outlen = 0;
362
0
        *inlenb = 0;
363
0
        return(0);
364
0
    }
365
0
    if (*outlen > *inlenb) {
366
0
  len = *inlenb;
367
0
    } else {
368
0
  len = *outlen;
369
0
    }
370
0
    if (len < 0)
371
0
  return(-1);
372
373
0
    memcpy(out, inb, len);
374
375
0
    *outlen = len;
376
0
    *inlenb = len;
377
0
    return(*outlen);
378
0
}
379
380
381
#ifdef LIBXML_OUTPUT_ENABLED
382
/**
383
 * UTF8Toisolat1:
384
 * @out:  a pointer to an array of bytes to store the result
385
 * @outlen:  the length of @out
386
 * @in:  a pointer to an array of UTF-8 chars
387
 * @inlen:  the length of @in
388
 *
389
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
390
 * block of chars out.
391
 *
392
 * Returns the number of bytes written if success, -2 if the transcoding fails,
393
           or -1 otherwise
394
 * The value of @inlen after return is the number of octets consumed
395
 *     if the return value is positive, else unpredictable.
396
 * The value of @outlen after return is the number of octets consumed.
397
 */
398
int
399
UTF8Toisolat1(unsigned char* out, int *outlen,
400
0
              const unsigned char* in, int *inlen) {
401
0
    const unsigned char* processed = in;
402
0
    const unsigned char* outend;
403
0
    const unsigned char* outstart = out;
404
0
    const unsigned char* instart = in;
405
0
    const unsigned char* inend;
406
0
    unsigned int c, d;
407
0
    int trailing;
408
409
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
410
0
    if (in == NULL) {
411
        /*
412
   * initialization nothing to do
413
   */
414
0
  *outlen = 0;
415
0
  *inlen = 0;
416
0
  return(0);
417
0
    }
418
0
    inend = in + (*inlen);
419
0
    outend = out + (*outlen);
420
0
    while (in < inend) {
421
0
  d = *in++;
422
0
  if      (d < 0x80)  { c= d; trailing= 0; }
423
0
  else if (d < 0xC0) {
424
      /* trailing byte in leading position */
425
0
      *outlen = out - outstart;
426
0
      *inlen = processed - instart;
427
0
      return(-2);
428
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
429
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
430
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
431
0
  else {
432
      /* no chance for this in IsoLat1 */
433
0
      *outlen = out - outstart;
434
0
      *inlen = processed - instart;
435
0
      return(-2);
436
0
  }
437
438
0
  if (inend - in < trailing) {
439
0
      break;
440
0
  }
441
442
0
  for ( ; trailing; trailing--) {
443
0
      if (in >= inend)
444
0
    break;
445
0
      if (((d= *in++) & 0xC0) != 0x80) {
446
0
    *outlen = out - outstart;
447
0
    *inlen = processed - instart;
448
0
    return(-2);
449
0
      }
450
0
      c <<= 6;
451
0
      c |= d & 0x3F;
452
0
  }
453
454
  /* assertion: c is a single UTF-4 value */
455
0
  if (c <= 0xFF) {
456
0
      if (out >= outend)
457
0
    break;
458
0
      *out++ = c;
459
0
  } else {
460
      /* no chance for this in IsoLat1 */
461
0
      *outlen = out - outstart;
462
0
      *inlen = processed - instart;
463
0
      return(-2);
464
0
  }
465
0
  processed = in;
466
0
    }
467
0
    *outlen = out - outstart;
468
0
    *inlen = processed - instart;
469
0
    return(*outlen);
470
0
}
471
#endif /* LIBXML_OUTPUT_ENABLED */
472
473
/**
474
 * UTF16LEToUTF8:
475
 * @out:  a pointer to an array of bytes to store the result
476
 * @outlen:  the length of @out
477
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
478
 * @inlenb:  the length of @in in UTF-16LE chars
479
 *
480
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
481
 * block of chars out. This function assumes the endian property
482
 * is the same between the native type of this machine and the
483
 * inputed one.
484
 *
485
 * Returns the number of bytes written, or -1 if lack of space, or -2
486
 *     if the transcoding fails (if *in is not a valid utf16 string)
487
 *     The value of *inlen after return is the number of octets consumed
488
 *     if the return value is positive, else unpredictable.
489
 */
490
static int
491
UTF16LEToUTF8(unsigned char* out, int *outlen,
492
            const unsigned char* inb, int *inlenb)
493
438k
{
494
438k
    unsigned char* outstart = out;
495
438k
    const unsigned char* processed = inb;
496
438k
    unsigned char* outend = out + *outlen;
497
438k
    unsigned short* in = (unsigned short*) inb;
498
438k
    unsigned short* inend;
499
438k
    unsigned int c, d, inlen;
500
438k
    unsigned char *tmp;
501
438k
    int bits;
502
503
438k
    if ((*inlenb % 2) == 1)
504
46.3k
        (*inlenb)--;
505
438k
    inlen = *inlenb / 2;
506
438k
    inend = in + inlen;
507
17.3M
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
508
16.8M
        if (xmlLittleEndian) {
509
16.8M
      c= *in++;
510
16.8M
  } else {
511
0
      tmp = (unsigned char *) in;
512
0
      c = *tmp++;
513
0
      c = c | (((unsigned int)*tmp) << 8);
514
0
      in++;
515
0
  }
516
16.8M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
517
13.3k
      if (in >= inend) {           /* (in > inend) shouldn't happens */
518
6.83k
    break;
519
6.83k
      }
520
6.49k
      if (xmlLittleEndian) {
521
6.49k
    d = *in++;
522
6.49k
      } else {
523
0
    tmp = (unsigned char *) in;
524
0
    d = *tmp++;
525
0
    d = d | (((unsigned int)*tmp) << 8);
526
0
    in++;
527
0
      }
528
6.49k
            if ((d & 0xFC00) == 0xDC00) {
529
6.35k
                c &= 0x03FF;
530
6.35k
                c <<= 10;
531
6.35k
                c |= d & 0x03FF;
532
6.35k
                c += 0x10000;
533
6.35k
            }
534
140
            else {
535
140
    *outlen = out - outstart;
536
140
    *inlenb = processed - inb;
537
140
          return(-2);
538
140
      }
539
6.49k
        }
540
541
  /* assertion: c is a single UTF-4 value */
542
16.8M
        if (out >= outend)
543
0
      break;
544
16.8M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
545
16.7M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
546
16.2M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
547
6.35k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
548
549
49.9M
        for ( ; bits >= 0; bits-= 6) {
550
33.0M
            if (out >= outend)
551
0
          break;
552
33.0M
            *out++= ((c >> bits) & 0x3F) | 0x80;
553
33.0M
        }
554
16.8M
  processed = (const unsigned char*) in;
555
16.8M
    }
556
438k
    *outlen = out - outstart;
557
438k
    *inlenb = processed - inb;
558
438k
    return(*outlen);
559
438k
}
560
561
#ifdef LIBXML_OUTPUT_ENABLED
562
/**
563
 * UTF8ToUTF16LE:
564
 * @outb:  a pointer to an array of bytes to store the result
565
 * @outlen:  the length of @outb
566
 * @in:  a pointer to an array of UTF-8 chars
567
 * @inlen:  the length of @in
568
 *
569
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
570
 * block of chars out.
571
 *
572
 * Returns the number of bytes written, or -1 if lack of space, or -2
573
 *     if the transcoding failed.
574
 */
575
static int
576
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
577
            const unsigned char* in, int *inlen)
578
0
{
579
0
    unsigned short* out = (unsigned short*) outb;
580
0
    const unsigned char* processed = in;
581
0
    const unsigned char *const instart = in;
582
0
    unsigned short* outstart= out;
583
0
    unsigned short* outend;
584
0
    const unsigned char* inend;
585
0
    unsigned int c, d;
586
0
    int trailing;
587
0
    unsigned char *tmp;
588
0
    unsigned short tmp1, tmp2;
589
590
    /* UTF16LE encoding has no BOM */
591
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
592
0
    if (in == NULL) {
593
0
  *outlen = 0;
594
0
  *inlen = 0;
595
0
  return(0);
596
0
    }
597
0
    inend= in + *inlen;
598
0
    outend = out + (*outlen / 2);
599
0
    while (in < inend) {
600
0
      d= *in++;
601
0
      if      (d < 0x80)  { c= d; trailing= 0; }
602
0
      else if (d < 0xC0) {
603
          /* trailing byte in leading position */
604
0
    *outlen = (out - outstart) * 2;
605
0
    *inlen = processed - instart;
606
0
    return(-2);
607
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
608
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
609
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
610
0
      else {
611
  /* no chance for this in UTF-16 */
612
0
  *outlen = (out - outstart) * 2;
613
0
  *inlen = processed - instart;
614
0
  return(-2);
615
0
      }
616
617
0
      if (inend - in < trailing) {
618
0
          break;
619
0
      }
620
621
0
      for ( ; trailing; trailing--) {
622
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
623
0
        break;
624
0
          c <<= 6;
625
0
          c |= d & 0x3F;
626
0
      }
627
628
      /* assertion: c is a single UTF-4 value */
629
0
        if (c < 0x10000) {
630
0
            if (out >= outend)
631
0
          break;
632
0
      if (xmlLittleEndian) {
633
0
    *out++ = c;
634
0
      } else {
635
0
    tmp = (unsigned char *) out;
636
0
    *tmp = c ;
637
0
    *(tmp + 1) = c >> 8 ;
638
0
    out++;
639
0
      }
640
0
        }
641
0
        else if (c < 0x110000) {
642
0
            if (out+1 >= outend)
643
0
          break;
644
0
            c -= 0x10000;
645
0
      if (xmlLittleEndian) {
646
0
    *out++ = 0xD800 | (c >> 10);
647
0
    *out++ = 0xDC00 | (c & 0x03FF);
648
0
      } else {
649
0
    tmp1 = 0xD800 | (c >> 10);
650
0
    tmp = (unsigned char *) out;
651
0
    *tmp = (unsigned char) tmp1;
652
0
    *(tmp + 1) = tmp1 >> 8;
653
0
    out++;
654
655
0
    tmp2 = 0xDC00 | (c & 0x03FF);
656
0
    tmp = (unsigned char *) out;
657
0
    *tmp  = (unsigned char) tmp2;
658
0
    *(tmp + 1) = tmp2 >> 8;
659
0
    out++;
660
0
      }
661
0
        }
662
0
        else
663
0
      break;
664
0
  processed = in;
665
0
    }
666
0
    *outlen = (out - outstart) * 2;
667
0
    *inlen = processed - instart;
668
0
    return(*outlen);
669
0
}
670
671
/**
672
 * UTF8ToUTF16:
673
 * @outb:  a pointer to an array of bytes to store the result
674
 * @outlen:  the length of @outb
675
 * @in:  a pointer to an array of UTF-8 chars
676
 * @inlen:  the length of @in
677
 *
678
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
679
 * block of chars out.
680
 *
681
 * Returns the number of bytes written, or -1 if lack of space, or -2
682
 *     if the transcoding failed.
683
 */
684
static int
685
UTF8ToUTF16(unsigned char* outb, int *outlen,
686
            const unsigned char* in, int *inlen)
687
0
{
688
0
    if (in == NULL) {
689
  /*
690
   * initialization, add the Byte Order Mark for UTF-16LE
691
   */
692
0
        if (*outlen >= 2) {
693
0
      outb[0] = 0xFF;
694
0
      outb[1] = 0xFE;
695
0
      *outlen = 2;
696
0
      *inlen = 0;
697
#ifdef DEBUG_ENCODING
698
            xmlGenericError(xmlGenericErrorContext,
699
        "Added FFFE Byte Order Mark\n");
700
#endif
701
0
      return(2);
702
0
  }
703
0
  *outlen = 0;
704
0
  *inlen = 0;
705
0
  return(0);
706
0
    }
707
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
708
0
}
709
#endif /* LIBXML_OUTPUT_ENABLED */
710
711
/**
712
 * UTF16BEToUTF8:
713
 * @out:  a pointer to an array of bytes to store the result
714
 * @outlen:  the length of @out
715
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
716
 * @inlenb:  the length of @in in UTF-16 chars
717
 *
718
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
719
 * block of chars out. This function assumes the endian property
720
 * is the same between the native type of this machine and the
721
 * inputed one.
722
 *
723
 * Returns the number of bytes written, or -1 if lack of space, or -2
724
 *     if the transcoding fails (if *in is not a valid utf16 string)
725
 * The value of *inlen after return is the number of octets consumed
726
 *     if the return value is positive, else unpredictable.
727
 */
728
static int
729
UTF16BEToUTF8(unsigned char* out, int *outlen,
730
            const unsigned char* inb, int *inlenb)
731
5.12M
{
732
5.12M
    unsigned char* outstart = out;
733
5.12M
    const unsigned char* processed = inb;
734
5.12M
    unsigned char* outend = out + *outlen;
735
5.12M
    unsigned short* in = (unsigned short*) inb;
736
5.12M
    unsigned short* inend;
737
5.12M
    unsigned int c, d, inlen;
738
5.12M
    unsigned char *tmp;
739
5.12M
    int bits;
740
741
5.12M
    if ((*inlenb % 2) == 1)
742
130k
        (*inlenb)--;
743
5.12M
    inlen = *inlenb / 2;
744
5.12M
    inend= in + inlen;
745
270M
    while (in < inend) {
746
265M
  if (xmlLittleEndian) {
747
265M
      tmp = (unsigned char *) in;
748
265M
      c = *tmp++;
749
265M
      c = c << 8;
750
265M
      c = c | (unsigned int) *tmp;
751
265M
      in++;
752
265M
  } else {
753
0
      c= *in++;
754
0
  }
755
265M
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
756
38.9k
      if (in >= inend) {           /* (in > inend) shouldn't happens */
757
1.03k
    *outlen = out - outstart;
758
1.03k
    *inlenb = processed - inb;
759
1.03k
          return(-2);
760
1.03k
      }
761
37.8k
      if (xmlLittleEndian) {
762
37.8k
    tmp = (unsigned char *) in;
763
37.8k
    d = *tmp++;
764
37.8k
    d = d << 8;
765
37.8k
    d = d | (unsigned int) *tmp;
766
37.8k
    in++;
767
37.8k
      } else {
768
0
    d= *in++;
769
0
      }
770
37.8k
            if ((d & 0xFC00) == 0xDC00) {
771
37.7k
                c &= 0x03FF;
772
37.7k
                c <<= 10;
773
37.7k
                c |= d & 0x03FF;
774
37.7k
                c += 0x10000;
775
37.7k
            }
776
127
            else {
777
127
    *outlen = out - outstart;
778
127
    *inlenb = processed - inb;
779
127
          return(-2);
780
127
      }
781
37.8k
        }
782
783
  /* assertion: c is a single UTF-4 value */
784
265M
        if (out >= outend)
785
0
      break;
786
265M
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
787
265M
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
788
247M
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
789
37.7k
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
790
791
778M
        for ( ; bits >= 0; bits-= 6) {
792
513M
            if (out >= outend)
793
0
          break;
794
513M
            *out++= ((c >> bits) & 0x3F) | 0x80;
795
513M
        }
796
265M
  processed = (const unsigned char*) in;
797
265M
    }
798
5.12M
    *outlen = out - outstart;
799
5.12M
    *inlenb = processed - inb;
800
5.12M
    return(*outlen);
801
5.12M
}
802
803
#ifdef LIBXML_OUTPUT_ENABLED
804
/**
805
 * UTF8ToUTF16BE:
806
 * @outb:  a pointer to an array of bytes to store the result
807
 * @outlen:  the length of @outb
808
 * @in:  a pointer to an array of UTF-8 chars
809
 * @inlen:  the length of @in
810
 *
811
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
812
 * block of chars out.
813
 *
814
 * Returns the number of byte written, or -1 by lack of space, or -2
815
 *     if the transcoding failed.
816
 */
817
static int
818
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
819
            const unsigned char* in, int *inlen)
820
0
{
821
0
    unsigned short* out = (unsigned short*) outb;
822
0
    const unsigned char* processed = in;
823
0
    const unsigned char *const instart = in;
824
0
    unsigned short* outstart= out;
825
0
    unsigned short* outend;
826
0
    const unsigned char* inend;
827
0
    unsigned int c, d;
828
0
    int trailing;
829
0
    unsigned char *tmp;
830
0
    unsigned short tmp1, tmp2;
831
832
    /* UTF-16BE has no BOM */
833
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
834
0
    if (in == NULL) {
835
0
  *outlen = 0;
836
0
  *inlen = 0;
837
0
  return(0);
838
0
    }
839
0
    inend= in + *inlen;
840
0
    outend = out + (*outlen / 2);
841
0
    while (in < inend) {
842
0
      d= *in++;
843
0
      if      (d < 0x80)  { c= d; trailing= 0; }
844
0
      else if (d < 0xC0)  {
845
          /* trailing byte in leading position */
846
0
    *outlen = out - outstart;
847
0
    *inlen = processed - instart;
848
0
    return(-2);
849
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
850
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
851
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
852
0
      else {
853
          /* no chance for this in UTF-16 */
854
0
    *outlen = out - outstart;
855
0
    *inlen = processed - instart;
856
0
    return(-2);
857
0
      }
858
859
0
      if (inend - in < trailing) {
860
0
          break;
861
0
      }
862
863
0
      for ( ; trailing; trailing--) {
864
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
865
0
          c <<= 6;
866
0
          c |= d & 0x3F;
867
0
      }
868
869
      /* assertion: c is a single UTF-4 value */
870
0
        if (c < 0x10000) {
871
0
            if (out >= outend)  break;
872
0
      if (xmlLittleEndian) {
873
0
    tmp = (unsigned char *) out;
874
0
    *tmp = c >> 8;
875
0
    *(tmp + 1) = c;
876
0
    out++;
877
0
      } else {
878
0
    *out++ = c;
879
0
      }
880
0
        }
881
0
        else if (c < 0x110000) {
882
0
            if (out+1 >= outend)  break;
883
0
            c -= 0x10000;
884
0
      if (xmlLittleEndian) {
885
0
    tmp1 = 0xD800 | (c >> 10);
886
0
    tmp = (unsigned char *) out;
887
0
    *tmp = tmp1 >> 8;
888
0
    *(tmp + 1) = (unsigned char) tmp1;
889
0
    out++;
890
891
0
    tmp2 = 0xDC00 | (c & 0x03FF);
892
0
    tmp = (unsigned char *) out;
893
0
    *tmp = tmp2 >> 8;
894
0
    *(tmp + 1) = (unsigned char) tmp2;
895
0
    out++;
896
0
      } else {
897
0
    *out++ = 0xD800 | (c >> 10);
898
0
    *out++ = 0xDC00 | (c & 0x03FF);
899
0
      }
900
0
        }
901
0
        else
902
0
      break;
903
0
  processed = in;
904
0
    }
905
0
    *outlen = (out - outstart) * 2;
906
0
    *inlen = processed - instart;
907
0
    return(*outlen);
908
0
}
909
#endif /* LIBXML_OUTPUT_ENABLED */
910
911
/************************************************************************
912
 *                  *
913
 *    Generic encoding handling routines      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlDetectCharEncoding:
919
 * @in:  a pointer to the first bytes of the XML entity, must be at least
920
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
921
 * @len:  pointer to the length of the buffer
922
 *
923
 * Guess the encoding of the entity using the first bytes of the entity content
924
 * according to the non-normative appendix F of the XML-1.0 recommendation.
925
 *
926
 * Returns one of the XML_CHAR_ENCODING_... values.
927
 */
928
xmlCharEncoding
929
xmlDetectCharEncoding(const unsigned char* in, int len)
930
234k
{
931
234k
    if (in == NULL)
932
0
        return(XML_CHAR_ENCODING_NONE);
933
234k
    if (len >= 4) {
934
234k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
935
323
      (in[2] == 0x00) && (in[3] == 0x3C))
936
149
      return(XML_CHAR_ENCODING_UCS4BE);
937
234k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
938
995
      (in[2] == 0x00) && (in[3] == 0x00))
939
24
      return(XML_CHAR_ENCODING_UCS4LE);
940
234k
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
941
174
      (in[2] == 0x3C) && (in[3] == 0x00))
942
10
      return(XML_CHAR_ENCODING_UCS4_2143);
943
234k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
944
624
      (in[2] == 0x00) && (in[3] == 0x00))
945
8
      return(XML_CHAR_ENCODING_UCS4_3412);
946
234k
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
947
271
      (in[2] == 0xA7) && (in[3] == 0x94))
948
213
      return(XML_CHAR_ENCODING_EBCDIC);
949
234k
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
950
90.0k
      (in[2] == 0x78) && (in[3] == 0x6D))
951
83.4k
      return(XML_CHAR_ENCODING_UTF8);
952
  /*
953
   * Although not part of the recommendation, we also
954
   * attempt an "auto-recognition" of UTF-16LE and
955
   * UTF-16BE encodings.
956
   */
957
150k
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
958
971
      (in[2] == 0x3F) && (in[3] == 0x00))
959
912
      return(XML_CHAR_ENCODING_UTF16LE);
960
149k
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
961
616
      (in[2] == 0x00) && (in[3] == 0x3F))
962
533
      return(XML_CHAR_ENCODING_UTF16BE);
963
149k
    }
964
149k
    if (len >= 3) {
965
  /*
966
   * Errata on XML-1.0 June 20 2001
967
   * We now allow an UTF8 encoded BOM
968
   */
969
149k
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
970
1.99k
      (in[2] == 0xBF))
971
1.97k
      return(XML_CHAR_ENCODING_UTF8);
972
149k
    }
973
    /* For UTF-16 we can recognize by the BOM */
974
147k
    if (len >= 2) {
975
147k
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
976
1.17k
      return(XML_CHAR_ENCODING_UTF16BE);
977
146k
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
978
1.23k
      return(XML_CHAR_ENCODING_UTF16LE);
979
146k
    }
980
145k
    return(XML_CHAR_ENCODING_NONE);
981
147k
}
982
983
/**
984
 * xmlCleanupEncodingAliases:
985
 *
986
 * Unregisters all aliases
987
 */
988
void
989
0
xmlCleanupEncodingAliases(void) {
990
0
    int i;
991
992
0
    if (xmlCharEncodingAliases == NULL)
993
0
  return;
994
995
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
996
0
  if (xmlCharEncodingAliases[i].name != NULL)
997
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
998
0
  if (xmlCharEncodingAliases[i].alias != NULL)
999
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1000
0
    }
1001
0
    xmlCharEncodingAliasesNb = 0;
1002
0
    xmlCharEncodingAliasesMax = 0;
1003
0
    xmlFree(xmlCharEncodingAliases);
1004
0
    xmlCharEncodingAliases = NULL;
1005
0
}
1006
1007
/**
1008
 * xmlGetEncodingAlias:
1009
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1010
 *
1011
 * Lookup an encoding name for the given alias.
1012
 *
1013
 * Returns NULL if not found, otherwise the original name
1014
 */
1015
const char *
1016
36.3k
xmlGetEncodingAlias(const char *alias) {
1017
36.3k
    int i;
1018
36.3k
    char upper[100];
1019
1020
36.3k
    if (alias == NULL)
1021
0
  return(NULL);
1022
1023
36.3k
    if (xmlCharEncodingAliases == NULL)
1024
36.3k
  return(NULL);
1025
1026
0
    for (i = 0;i < 99;i++) {
1027
0
        upper[i] = toupper(alias[i]);
1028
0
  if (upper[i] == 0) break;
1029
0
    }
1030
0
    upper[i] = 0;
1031
1032
    /*
1033
     * Walk down the list looking for a definition of the alias
1034
     */
1035
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1036
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1037
0
      return(xmlCharEncodingAliases[i].name);
1038
0
  }
1039
0
    }
1040
0
    return(NULL);
1041
0
}
1042
1043
/**
1044
 * xmlAddEncodingAlias:
1045
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1046
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1047
 *
1048
 * Registers an alias @alias for an encoding named @name. Existing alias
1049
 * will be overwritten.
1050
 *
1051
 * Returns 0 in case of success, -1 in case of error
1052
 */
1053
int
1054
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1055
0
    int i;
1056
0
    char upper[100];
1057
1058
0
    if ((name == NULL) || (alias == NULL))
1059
0
  return(-1);
1060
1061
0
    for (i = 0;i < 99;i++) {
1062
0
        upper[i] = toupper(alias[i]);
1063
0
  if (upper[i] == 0) break;
1064
0
    }
1065
0
    upper[i] = 0;
1066
1067
0
    if (xmlCharEncodingAliases == NULL) {
1068
0
  xmlCharEncodingAliasesNb = 0;
1069
0
  xmlCharEncodingAliasesMax = 20;
1070
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1071
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1072
0
  if (xmlCharEncodingAliases == NULL)
1073
0
      return(-1);
1074
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1075
0
  xmlCharEncodingAliasesMax *= 2;
1076
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1077
0
        xmlRealloc(xmlCharEncodingAliases,
1078
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1079
0
    }
1080
    /*
1081
     * Walk down the list looking for a definition of the alias
1082
     */
1083
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1084
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1085
      /*
1086
       * Replace the definition.
1087
       */
1088
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1089
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1090
0
      return(0);
1091
0
  }
1092
0
    }
1093
    /*
1094
     * Add the definition
1095
     */
1096
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1097
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1098
0
    xmlCharEncodingAliasesNb++;
1099
0
    return(0);
1100
0
}
1101
1102
/**
1103
 * xmlDelEncodingAlias:
1104
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1105
 *
1106
 * Unregisters an encoding alias @alias
1107
 *
1108
 * Returns 0 in case of success, -1 in case of error
1109
 */
1110
int
1111
0
xmlDelEncodingAlias(const char *alias) {
1112
0
    int i;
1113
1114
0
    if (alias == NULL)
1115
0
  return(-1);
1116
1117
0
    if (xmlCharEncodingAliases == NULL)
1118
0
  return(-1);
1119
    /*
1120
     * Walk down the list looking for a definition of the alias
1121
     */
1122
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1123
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1124
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1125
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1126
0
      xmlCharEncodingAliasesNb--;
1127
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1128
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1129
0
      return(0);
1130
0
  }
1131
0
    }
1132
0
    return(-1);
1133
0
}
1134
1135
/**
1136
 * xmlParseCharEncoding:
1137
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1138
 *
1139
 * Compare the string to the encoding schemes already known. Note
1140
 * that the comparison is case insensitive accordingly to the section
1141
 * [XML] 4.3.3 Character Encoding in Entities.
1142
 *
1143
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1144
 * if not recognized.
1145
 */
1146
xmlCharEncoding
1147
xmlParseCharEncoding(const char* name)
1148
3.70k
{
1149
3.70k
    const char *alias;
1150
3.70k
    char upper[500];
1151
3.70k
    int i;
1152
1153
3.70k
    if (name == NULL)
1154
0
  return(XML_CHAR_ENCODING_NONE);
1155
1156
    /*
1157
     * Do the alias resolution
1158
     */
1159
3.70k
    alias = xmlGetEncodingAlias(name);
1160
3.70k
    if (alias != NULL)
1161
0
  name = alias;
1162
1163
54.5k
    for (i = 0;i < 499;i++) {
1164
54.5k
        upper[i] = toupper(name[i]);
1165
54.5k
  if (upper[i] == 0) break;
1166
54.5k
    }
1167
3.70k
    upper[i] = 0;
1168
1169
3.70k
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1170
3.70k
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1171
3.70k
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1172
1173
    /*
1174
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1175
     *       already found and in use
1176
     */
1177
3.70k
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1178
3.70k
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1179
1180
3.70k
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1181
3.68k
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182
3.68k
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1183
1184
    /*
1185
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1186
     *       already found and in use
1187
     */
1188
3.68k
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1189
3.33k
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190
3.33k
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
1192
1193
3.33k
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1194
3.33k
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1195
3.29k
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1196
1197
3.29k
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1198
3.29k
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1199
3.28k
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1200
1201
3.28k
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1202
3.28k
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1203
3.28k
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1204
3.28k
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1205
3.28k
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1206
3.28k
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1207
3.28k
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1208
1209
3.28k
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1210
3.28k
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1211
3.28k
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1212
1213
#ifdef DEBUG_ENCODING
1214
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1215
#endif
1216
3.28k
    return(XML_CHAR_ENCODING_ERROR);
1217
3.28k
}
1218
1219
/**
1220
 * xmlGetCharEncodingName:
1221
 * @enc:  the encoding
1222
 *
1223
 * The "canonical" name for XML encoding.
1224
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1225
 * Section 4.3.3  Character Encoding in Entities
1226
 *
1227
 * Returns the canonical name for the given encoding
1228
 */
1229
1230
const char*
1231
416
xmlGetCharEncodingName(xmlCharEncoding enc) {
1232
416
    switch (enc) {
1233
0
        case XML_CHAR_ENCODING_ERROR:
1234
0
      return(NULL);
1235
0
        case XML_CHAR_ENCODING_NONE:
1236
0
      return(NULL);
1237
0
        case XML_CHAR_ENCODING_UTF8:
1238
0
      return("UTF-8");
1239
0
        case XML_CHAR_ENCODING_UTF16LE:
1240
0
      return("UTF-16");
1241
0
        case XML_CHAR_ENCODING_UTF16BE:
1242
0
      return("UTF-16");
1243
0
        case XML_CHAR_ENCODING_EBCDIC:
1244
0
            return("EBCDIC");
1245
352
        case XML_CHAR_ENCODING_UCS4LE:
1246
352
            return("ISO-10646-UCS-4");
1247
0
        case XML_CHAR_ENCODING_UCS4BE:
1248
0
            return("ISO-10646-UCS-4");
1249
0
        case XML_CHAR_ENCODING_UCS4_2143:
1250
0
            return("ISO-10646-UCS-4");
1251
0
        case XML_CHAR_ENCODING_UCS4_3412:
1252
0
            return("ISO-10646-UCS-4");
1253
16
        case XML_CHAR_ENCODING_UCS2:
1254
16
            return("ISO-10646-UCS-2");
1255
40
        case XML_CHAR_ENCODING_8859_1:
1256
40
      return("ISO-8859-1");
1257
8
        case XML_CHAR_ENCODING_8859_2:
1258
8
      return("ISO-8859-2");
1259
0
        case XML_CHAR_ENCODING_8859_3:
1260
0
      return("ISO-8859-3");
1261
0
        case XML_CHAR_ENCODING_8859_4:
1262
0
      return("ISO-8859-4");
1263
0
        case XML_CHAR_ENCODING_8859_5:
1264
0
      return("ISO-8859-5");
1265
0
        case XML_CHAR_ENCODING_8859_6:
1266
0
      return("ISO-8859-6");
1267
0
        case XML_CHAR_ENCODING_8859_7:
1268
0
      return("ISO-8859-7");
1269
0
        case XML_CHAR_ENCODING_8859_8:
1270
0
      return("ISO-8859-8");
1271
0
        case XML_CHAR_ENCODING_8859_9:
1272
0
      return("ISO-8859-9");
1273
0
        case XML_CHAR_ENCODING_2022_JP:
1274
0
            return("ISO-2022-JP");
1275
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1276
0
            return("Shift-JIS");
1277
0
        case XML_CHAR_ENCODING_EUC_JP:
1278
0
            return("EUC-JP");
1279
0
  case XML_CHAR_ENCODING_ASCII:
1280
0
      return(NULL);
1281
416
    }
1282
0
    return(NULL);
1283
416
}
1284
1285
/************************************************************************
1286
 *                  *
1287
 *      Char encoding handlers        *
1288
 *                  *
1289
 ************************************************************************/
1290
1291
1292
/* the size should be growable, but it's not a big deal ... */
1293
63
#define MAX_ENCODING_HANDLERS 50
1294
static xmlCharEncodingHandlerPtr *handlers = NULL;
1295
static int nbCharEncodingHandler = 0;
1296
1297
/*
1298
 * The default is UTF-8 for XML, that's also the default used for the
1299
 * parser internals, so the default encoding handler is NULL
1300
 */
1301
1302
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1303
1304
/**
1305
 * xmlNewCharEncodingHandler:
1306
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1307
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1308
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1309
 *
1310
 * Create and registers an xmlCharEncodingHandler.
1311
 *
1312
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1313
 */
1314
xmlCharEncodingHandlerPtr
1315
xmlNewCharEncodingHandler(const char *name,
1316
                          xmlCharEncodingInputFunc input,
1317
56
                          xmlCharEncodingOutputFunc output) {
1318
56
    xmlCharEncodingHandlerPtr handler;
1319
56
    const char *alias;
1320
56
    char upper[500];
1321
56
    int i;
1322
56
    char *up = NULL;
1323
1324
    /*
1325
     * Do the alias resolution
1326
     */
1327
56
    alias = xmlGetEncodingAlias(name);
1328
56
    if (alias != NULL)
1329
0
  name = alias;
1330
1331
    /*
1332
     * Keep only the uppercase version of the encoding.
1333
     */
1334
56
    if (name == NULL) {
1335
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1336
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1337
0
  return(NULL);
1338
0
    }
1339
434
    for (i = 0;i < 499;i++) {
1340
434
        upper[i] = toupper(name[i]);
1341
434
  if (upper[i] == 0) break;
1342
434
    }
1343
56
    upper[i] = 0;
1344
56
    up = xmlMemStrdup(upper);
1345
56
    if (up == NULL) {
1346
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1347
0
  return(NULL);
1348
0
    }
1349
1350
    /*
1351
     * allocate and fill-up an handler block.
1352
     */
1353
56
    handler = (xmlCharEncodingHandlerPtr)
1354
56
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1355
56
    if (handler == NULL) {
1356
0
        xmlFree(up);
1357
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1358
0
  return(NULL);
1359
0
    }
1360
56
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1361
56
    handler->input = input;
1362
56
    handler->output = output;
1363
56
    handler->name = up;
1364
1365
56
#ifdef LIBXML_ICONV_ENABLED
1366
56
    handler->iconv_in = NULL;
1367
56
    handler->iconv_out = NULL;
1368
56
#endif
1369
#ifdef LIBXML_ICU_ENABLED
1370
    handler->uconv_in = NULL;
1371
    handler->uconv_out = NULL;
1372
#endif
1373
1374
    /*
1375
     * registers and returns the handler.
1376
     */
1377
56
    xmlRegisterCharEncodingHandler(handler);
1378
#ifdef DEBUG_ENCODING
1379
    xmlGenericError(xmlGenericErrorContext,
1380
      "Registered encoding handler for %s\n", name);
1381
#endif
1382
56
    return(handler);
1383
56
}
1384
1385
/**
1386
 * xmlInitCharEncodingHandlers:
1387
 *
1388
 * Initialize the char encoding support, it registers the default
1389
 * encoding supported.
1390
 * NOTE: while public, this function usually doesn't need to be called
1391
 *       in normal processing.
1392
 */
1393
void
1394
7
xmlInitCharEncodingHandlers(void) {
1395
7
    unsigned short int tst = 0x1234;
1396
7
    unsigned char *ptr = (unsigned char *) &tst;
1397
1398
7
    if (handlers != NULL) return;
1399
1400
7
    handlers = (xmlCharEncodingHandlerPtr *)
1401
7
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1402
1403
7
    if (*ptr == 0x12) xmlLittleEndian = 0;
1404
7
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1405
0
    else {
1406
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1407
0
                 "Odd problem at endianness detection\n", NULL);
1408
0
    }
1409
1410
7
    if (handlers == NULL) {
1411
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1412
0
  return;
1413
0
    }
1414
7
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1415
7
#ifdef LIBXML_OUTPUT_ENABLED
1416
7
    xmlUTF16LEHandler =
1417
7
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1418
7
    xmlUTF16BEHandler =
1419
7
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1420
7
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1421
7
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1422
7
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1423
7
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1424
7
#ifdef LIBXML_HTML_ENABLED
1425
7
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1426
7
#endif
1427
#else
1428
    xmlUTF16LEHandler =
1429
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1430
    xmlUTF16BEHandler =
1431
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1432
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1433
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1434
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1435
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1436
#endif /* LIBXML_OUTPUT_ENABLED */
1437
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1438
#ifdef LIBXML_ISO8859X_ENABLED
1439
    xmlRegisterCharEncodingHandlersISO8859x ();
1440
#endif
1441
#endif
1442
1443
7
}
1444
1445
/**
1446
 * xmlCleanupCharEncodingHandlers:
1447
 *
1448
 * Cleanup the memory allocated for the char encoding support, it
1449
 * unregisters all the encoding handlers and the aliases.
1450
 */
1451
void
1452
0
xmlCleanupCharEncodingHandlers(void) {
1453
0
    xmlCleanupEncodingAliases();
1454
1455
0
    if (handlers == NULL) return;
1456
1457
0
    for (;nbCharEncodingHandler > 0;) {
1458
0
        nbCharEncodingHandler--;
1459
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1460
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1461
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1462
0
      xmlFree(handlers[nbCharEncodingHandler]);
1463
0
  }
1464
0
    }
1465
0
    xmlFree(handlers);
1466
0
    handlers = NULL;
1467
0
    nbCharEncodingHandler = 0;
1468
0
    xmlDefaultCharEncodingHandler = NULL;
1469
0
}
1470
1471
/**
1472
 * xmlRegisterCharEncodingHandler:
1473
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1474
 *
1475
 * Register the char encoding handler, surprising, isn't it ?
1476
 */
1477
void
1478
56
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1479
56
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1480
56
    if ((handler == NULL) || (handlers == NULL)) {
1481
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1482
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1483
0
  return;
1484
0
    }
1485
1486
56
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1487
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1488
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1489
0
                 "MAX_ENCODING_HANDLERS");
1490
0
  return;
1491
0
    }
1492
56
    handlers[nbCharEncodingHandler++] = handler;
1493
56
}
1494
1495
/**
1496
 * xmlGetCharEncodingHandler:
1497
 * @enc:  an xmlCharEncoding value.
1498
 *
1499
 * Search in the registered set the handler able to read/write that encoding.
1500
 *
1501
 * Returns the handler or NULL if not found
1502
 */
1503
xmlCharEncodingHandlerPtr
1504
674k
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1505
674k
    xmlCharEncodingHandlerPtr handler;
1506
1507
674k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1508
674k
    switch (enc) {
1509
0
        case XML_CHAR_ENCODING_ERROR:
1510
0
      return(NULL);
1511
580k
        case XML_CHAR_ENCODING_NONE:
1512
580k
      return(NULL);
1513
85.4k
        case XML_CHAR_ENCODING_UTF8:
1514
85.4k
      return(NULL);
1515
4.29k
        case XML_CHAR_ENCODING_UTF16LE:
1516
4.29k
      return(xmlUTF16LEHandler);
1517
3.40k
        case XML_CHAR_ENCODING_UTF16BE:
1518
3.40k
      return(xmlUTF16BEHandler);
1519
426
        case XML_CHAR_ENCODING_EBCDIC:
1520
426
            handler = xmlFindCharEncodingHandler("EBCDIC");
1521
426
            if (handler != NULL) return(handler);
1522
426
            handler = xmlFindCharEncodingHandler("ebcdic");
1523
426
            if (handler != NULL) return(handler);
1524
426
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1525
426
            if (handler != NULL) return(handler);
1526
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1527
0
            if (handler != NULL) return(handler);
1528
0
      break;
1529
298
        case XML_CHAR_ENCODING_UCS4BE:
1530
298
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531
298
            if (handler != NULL) return(handler);
1532
298
            handler = xmlFindCharEncodingHandler("UCS-4");
1533
298
            if (handler != NULL) return(handler);
1534
0
            handler = xmlFindCharEncodingHandler("UCS4");
1535
0
            if (handler != NULL) return(handler);
1536
0
      break;
1537
48
        case XML_CHAR_ENCODING_UCS4LE:
1538
48
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1539
48
            if (handler != NULL) return(handler);
1540
48
            handler = xmlFindCharEncodingHandler("UCS-4");
1541
48
            if (handler != NULL) return(handler);
1542
0
            handler = xmlFindCharEncodingHandler("UCS4");
1543
0
            if (handler != NULL) return(handler);
1544
0
      break;
1545
20
        case XML_CHAR_ENCODING_UCS4_2143:
1546
20
      break;
1547
16
        case XML_CHAR_ENCODING_UCS4_3412:
1548
16
      break;
1549
0
        case XML_CHAR_ENCODING_UCS2:
1550
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1551
0
            if (handler != NULL) return(handler);
1552
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1553
0
            if (handler != NULL) return(handler);
1554
0
            handler = xmlFindCharEncodingHandler("UCS2");
1555
0
            if (handler != NULL) return(handler);
1556
0
      break;
1557
1558
      /*
1559
       * We used to keep ISO Latin encodings native in the
1560
       * generated data. This led to so many problems that
1561
       * this has been removed. One can still change this
1562
       * back by registering no-ops encoders for those
1563
       */
1564
0
        case XML_CHAR_ENCODING_8859_1:
1565
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1566
0
      if (handler != NULL) return(handler);
1567
0
      break;
1568
0
        case XML_CHAR_ENCODING_8859_2:
1569
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1570
0
      if (handler != NULL) return(handler);
1571
0
      break;
1572
0
        case XML_CHAR_ENCODING_8859_3:
1573
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1574
0
      if (handler != NULL) return(handler);
1575
0
      break;
1576
0
        case XML_CHAR_ENCODING_8859_4:
1577
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1578
0
      if (handler != NULL) return(handler);
1579
0
      break;
1580
0
        case XML_CHAR_ENCODING_8859_5:
1581
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1582
0
      if (handler != NULL) return(handler);
1583
0
      break;
1584
0
        case XML_CHAR_ENCODING_8859_6:
1585
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1586
0
      if (handler != NULL) return(handler);
1587
0
      break;
1588
0
        case XML_CHAR_ENCODING_8859_7:
1589
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1590
0
      if (handler != NULL) return(handler);
1591
0
      break;
1592
0
        case XML_CHAR_ENCODING_8859_8:
1593
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1594
0
      if (handler != NULL) return(handler);
1595
0
      break;
1596
0
        case XML_CHAR_ENCODING_8859_9:
1597
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1598
0
      if (handler != NULL) return(handler);
1599
0
      break;
1600
1601
1602
0
        case XML_CHAR_ENCODING_2022_JP:
1603
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1604
0
            if (handler != NULL) return(handler);
1605
0
      break;
1606
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1607
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1608
0
            if (handler != NULL) return(handler);
1609
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1610
0
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
0
        case XML_CHAR_ENCODING_EUC_JP:
1615
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1616
0
            if (handler != NULL) return(handler);
1617
0
      break;
1618
0
  default:
1619
0
      break;
1620
674k
    }
1621
1622
#ifdef DEBUG_ENCODING
1623
    xmlGenericError(xmlGenericErrorContext,
1624
      "No handler found for encoding %d\n", enc);
1625
#endif
1626
36
    return(NULL);
1627
674k
}
1628
1629
/**
1630
 * xmlFindCharEncodingHandler:
1631
 * @name:  a string describing the char encoding.
1632
 *
1633
 * Search in the registered set the handler able to read/write that encoding.
1634
 *
1635
 * Returns the handler or NULL if not found
1636
 */
1637
xmlCharEncodingHandlerPtr
1638
32.5k
xmlFindCharEncodingHandler(const char *name) {
1639
32.5k
    const char *nalias;
1640
32.5k
    const char *norig;
1641
32.5k
    xmlCharEncoding alias;
1642
32.5k
#ifdef LIBXML_ICONV_ENABLED
1643
32.5k
    xmlCharEncodingHandlerPtr enc;
1644
32.5k
    iconv_t icv_in, icv_out;
1645
32.5k
#endif /* LIBXML_ICONV_ENABLED */
1646
#ifdef LIBXML_ICU_ENABLED
1647
    xmlCharEncodingHandlerPtr encu;
1648
    uconv_t *ucv_in, *ucv_out;
1649
#endif /* LIBXML_ICU_ENABLED */
1650
32.5k
    char upper[100];
1651
32.5k
    int i;
1652
1653
32.5k
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1654
32.5k
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1655
32.5k
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1656
1657
    /*
1658
     * Do the alias resolution
1659
     */
1660
32.5k
    norig = name;
1661
32.5k
    nalias = xmlGetEncodingAlias(name);
1662
32.5k
    if (nalias != NULL)
1663
0
  name = nalias;
1664
1665
    /*
1666
     * Check first for directly registered encoding names
1667
     */
1668
225k
    for (i = 0;i < 99;i++) {
1669
225k
        upper[i] = toupper(name[i]);
1670
225k
  if (upper[i] == 0) break;
1671
225k
    }
1672
32.5k
    upper[i] = 0;
1673
1674
32.5k
    if (handlers != NULL) {
1675
282k
        for (i = 0;i < nbCharEncodingHandler; i++) {
1676
252k
            if (!strcmp(upper, handlers[i]->name)) {
1677
#ifdef DEBUG_ENCODING
1678
                xmlGenericError(xmlGenericErrorContext,
1679
                        "Found registered handler for encoding %s\n", name);
1680
#endif
1681
2.77k
                return(handlers[i]);
1682
2.77k
            }
1683
252k
        }
1684
32.5k
    }
1685
1686
29.7k
#ifdef LIBXML_ICONV_ENABLED
1687
    /* check whether iconv can handle this */
1688
29.7k
    icv_in = iconv_open("UTF-8", name);
1689
29.7k
    icv_out = iconv_open(name, "UTF-8");
1690
29.7k
    if (icv_in == (iconv_t) -1) {
1691
3.70k
        icv_in = iconv_open("UTF-8", upper);
1692
3.70k
    }
1693
29.7k
    if (icv_out == (iconv_t) -1) {
1694
3.70k
  icv_out = iconv_open(upper, "UTF-8");
1695
3.70k
    }
1696
29.7k
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1697
26.0k
      enc = (xmlCharEncodingHandlerPtr)
1698
26.0k
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1699
26.0k
      if (enc == NULL) {
1700
0
          iconv_close(icv_in);
1701
0
          iconv_close(icv_out);
1702
0
    return(NULL);
1703
0
      }
1704
26.0k
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1705
26.0k
      enc->name = xmlMemStrdup(name);
1706
26.0k
      enc->input = NULL;
1707
26.0k
      enc->output = NULL;
1708
26.0k
      enc->iconv_in = icv_in;
1709
26.0k
      enc->iconv_out = icv_out;
1710
#ifdef DEBUG_ENCODING
1711
            xmlGenericError(xmlGenericErrorContext,
1712
        "Found iconv handler for encoding %s\n", name);
1713
#endif
1714
26.0k
      return enc;
1715
26.0k
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1716
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1717
0
        "iconv : problems with filters for '%s'\n", name);
1718
0
    }
1719
3.70k
#endif /* LIBXML_ICONV_ENABLED */
1720
#ifdef LIBXML_ICU_ENABLED
1721
    /* check whether icu can handle this */
1722
    ucv_in = openIcuConverter(name, 1);
1723
    ucv_out = openIcuConverter(name, 0);
1724
    if (ucv_in != NULL && ucv_out != NULL) {
1725
      encu = (xmlCharEncodingHandlerPtr)
1726
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1727
      if (encu == NULL) {
1728
                closeIcuConverter(ucv_in);
1729
                closeIcuConverter(ucv_out);
1730
    return(NULL);
1731
      }
1732
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1733
      encu->name = xmlMemStrdup(name);
1734
      encu->input = NULL;
1735
      encu->output = NULL;
1736
      encu->uconv_in = ucv_in;
1737
      encu->uconv_out = ucv_out;
1738
#ifdef DEBUG_ENCODING
1739
            xmlGenericError(xmlGenericErrorContext,
1740
        "Found ICU converter handler for encoding %s\n", name);
1741
#endif
1742
      return encu;
1743
    } else if (ucv_in != NULL || ucv_out != NULL) {
1744
            closeIcuConverter(ucv_in);
1745
            closeIcuConverter(ucv_out);
1746
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1747
        "ICU converter : problems with filters for '%s'\n", name);
1748
    }
1749
#endif /* LIBXML_ICU_ENABLED */
1750
1751
#ifdef DEBUG_ENCODING
1752
    xmlGenericError(xmlGenericErrorContext,
1753
      "No handler found for encoding %s\n", name);
1754
#endif
1755
1756
    /*
1757
     * Fallback using the canonical names
1758
     */
1759
3.70k
    alias = xmlParseCharEncoding(norig);
1760
3.70k
    if (alias != XML_CHAR_ENCODING_ERROR) {
1761
416
        const char* canon;
1762
416
        canon = xmlGetCharEncodingName(alias);
1763
416
        if ((canon != NULL) && (strcmp(name, canon))) {
1764
54
      return(xmlFindCharEncodingHandler(canon));
1765
54
        }
1766
416
    }
1767
1768
    /* If "none of the above", give up */
1769
3.64k
    return(NULL);
1770
3.70k
}
1771
1772
/************************************************************************
1773
 *                  *
1774
 *    ICONV based generic conversion functions    *
1775
 *                  *
1776
 ************************************************************************/
1777
1778
#ifdef LIBXML_ICONV_ENABLED
1779
/**
1780
 * xmlIconvWrapper:
1781
 * @cd:   iconv converter data structure
1782
 * @out:  a pointer to an array of bytes to store the result
1783
 * @outlen:  the length of @out
1784
 * @in:  a pointer to an array of ISO Latin 1 chars
1785
 * @inlen:  the length of @in
1786
 *
1787
 * Returns 0 if success, or
1788
 *     -1 by lack of space, or
1789
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1790
 *        the result of transformation can't fit into the encoding we want), or
1791
 *     -3 if there the last byte can't form a single output char.
1792
 *
1793
 * The value of @inlen after return is the number of octets consumed
1794
 *     as the return value is positive, else unpredictable.
1795
 * The value of @outlen after return is the number of ocetes consumed.
1796
 */
1797
static int
1798
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1799
1.90M
                const unsigned char *in, int *inlen) {
1800
1.90M
    size_t icv_inlen, icv_outlen;
1801
1.90M
    const char *icv_in = (const char *) in;
1802
1.90M
    char *icv_out = (char *) out;
1803
1.90M
    int ret;
1804
1805
1.90M
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1806
0
        if (outlen != NULL) *outlen = 0;
1807
0
        return(-1);
1808
0
    }
1809
1.90M
    icv_inlen = *inlen;
1810
1.90M
    icv_outlen = *outlen;
1811
1.90M
    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1812
1.90M
    *inlen -= icv_inlen;
1813
1.90M
    *outlen -= icv_outlen;
1814
1.90M
    if ((icv_inlen != 0) || (ret == -1)) {
1815
73.6k
#ifdef EILSEQ
1816
73.6k
        if (errno == EILSEQ) {
1817
4.51k
            return -2;
1818
4.51k
        } else
1819
69.1k
#endif
1820
69.1k
#ifdef E2BIG
1821
69.1k
        if (errno == E2BIG) {
1822
66.7k
            return -1;
1823
66.7k
        } else
1824
2.42k
#endif
1825
2.42k
#ifdef EINVAL
1826
2.42k
        if (errno == EINVAL) {
1827
2.42k
            return -3;
1828
2.42k
        } else
1829
0
#endif
1830
0
        {
1831
0
            return -3;
1832
0
        }
1833
73.6k
    }
1834
1.82M
    return 0;
1835
1.90M
}
1836
#endif /* LIBXML_ICONV_ENABLED */
1837
1838
/************************************************************************
1839
 *                  *
1840
 *    ICU based generic conversion functions    *
1841
 *                  *
1842
 ************************************************************************/
1843
1844
#ifdef LIBXML_ICU_ENABLED
1845
/**
1846
 * xmlUconvWrapper:
1847
 * @cd: ICU uconverter data structure
1848
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1849
 * @out:  a pointer to an array of bytes to store the result
1850
 * @outlen:  the length of @out
1851
 * @in:  a pointer to an array of ISO Latin 1 chars
1852
 * @inlen:  the length of @in
1853
 *
1854
 * Returns 0 if success, or
1855
 *     -1 by lack of space, or
1856
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1857
 *        the result of transformation can't fit into the encoding we want), or
1858
 *     -3 if there the last byte can't form a single output char.
1859
 *
1860
 * The value of @inlen after return is the number of octets consumed
1861
 *     as the return value is positive, else unpredictable.
1862
 * The value of @outlen after return is the number of ocetes consumed.
1863
 */
1864
static int
1865
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1866
                const unsigned char *in, int *inlen) {
1867
    const char *ucv_in = (const char *) in;
1868
    char *ucv_out = (char *) out;
1869
    UErrorCode err = U_ZERO_ERROR;
1870
1871
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1872
        if (outlen != NULL) *outlen = 0;
1873
        return(-1);
1874
    }
1875
1876
    /*
1877
     * TODO(jungshik)
1878
     * 1. is ucnv_convert(To|From)Algorithmic better?
1879
     * 2. had we better use an explicit pivot buffer?
1880
     * 3. error returned comes from 'fromUnicode' only even
1881
     *    when toUnicode is true !
1882
     */
1883
    if (toUnicode) {
1884
        /* encoding => UTF-16 => UTF-8 */
1885
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1886
                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1887
                       0, TRUE, &err);
1888
    } else {
1889
        /* UTF-8 => UTF-16 => encoding */
1890
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1891
                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1892
                       0, TRUE, &err);
1893
    }
1894
    *inlen = ucv_in - (const char*) in;
1895
    *outlen = ucv_out - (char *) out;
1896
    if (U_SUCCESS(err))
1897
        return 0;
1898
    if (err == U_BUFFER_OVERFLOW_ERROR)
1899
        return -1;
1900
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1901
        return -2;
1902
    /* if (err == U_TRUNCATED_CHAR_FOUND) */
1903
    return -3;
1904
}
1905
#endif /* LIBXML_ICU_ENABLED */
1906
1907
/************************************************************************
1908
 *                  *
1909
 *    The real API used by libxml for on-the-fly conversion *
1910
 *                  *
1911
 ************************************************************************/
1912
1913
static int
1914
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1915
7.62M
                 int *outlen, const unsigned char *in, int *inlen) {
1916
7.62M
    int ret;
1917
1918
7.62M
    if (handler->input != NULL) {
1919
5.71M
        ret = handler->input(out, outlen, in, inlen);
1920
5.71M
    }
1921
1.90M
#ifdef LIBXML_ICONV_ENABLED
1922
1.90M
    else if (handler->iconv_in != NULL) {
1923
1.90M
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1924
1.90M
    }
1925
7
#endif /* LIBXML_ICONV_ENABLED */
1926
#ifdef LIBXML_ICU_ENABLED
1927
    else if (handler->uconv_in != NULL) {
1928
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
1929
    }
1930
#endif /* LIBXML_ICU_ENABLED */
1931
7
    else {
1932
7
        *outlen = 0;
1933
7
        *inlen = 0;
1934
7
        ret = -2;
1935
7
    }
1936
1937
7.62M
    return(ret);
1938
7.62M
}
1939
1940
/* Returns -4 if no output function was found. */
1941
static int
1942
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1943
0
                  int *outlen, const unsigned char *in, int *inlen) {
1944
0
    int ret;
1945
1946
0
    if (handler->output != NULL) {
1947
0
        ret = handler->output(out, outlen, in, inlen);
1948
0
    }
1949
0
#ifdef LIBXML_ICONV_ENABLED
1950
0
    else if (handler->iconv_out != NULL) {
1951
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1952
0
    }
1953
0
#endif /* LIBXML_ICONV_ENABLED */
1954
#ifdef LIBXML_ICU_ENABLED
1955
    else if (handler->uconv_out != NULL) {
1956
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
1957
    }
1958
#endif /* LIBXML_ICU_ENABLED */
1959
0
    else {
1960
0
        *outlen = 0;
1961
0
        *inlen = 0;
1962
0
        ret = -4;
1963
0
    }
1964
1965
0
    return(ret);
1966
0
}
1967
1968
/**
1969
 * xmlCharEncFirstLineInt:
1970
 * @handler:  char enconding transformation data structure
1971
 * @out:  an xmlBuffer for the output.
1972
 * @in:  an xmlBuffer for the input
1973
 * @len:  number of bytes to convert for the first line, or -1
1974
 *
1975
 * Front-end for the encoding handler input function, but handle only
1976
 * the very first line, i.e. limit itself to 45 chars.
1977
 *
1978
 * Returns the number of byte written if success, or
1979
 *     -1 general error
1980
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1981
 *        the result of transformation can't fit into the encoding we want), or
1982
 */
1983
int
1984
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1985
0
                       xmlBufferPtr in, int len) {
1986
0
    int ret;
1987
0
    int written;
1988
0
    int toconv;
1989
1990
0
    if (handler == NULL) return(-1);
1991
0
    if (out == NULL) return(-1);
1992
0
    if (in == NULL) return(-1);
1993
1994
    /* calculate space available */
1995
0
    written = out->size - out->use - 1; /* count '\0' */
1996
0
    toconv = in->use;
1997
    /*
1998
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1999
     * 45 chars should be sufficient to reach the end of the encoding
2000
     * declaration without going too far inside the document content.
2001
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2002
     * The actual value depending on guessed encoding is passed as @len
2003
     * if provided
2004
     */
2005
0
    if (len >= 0) {
2006
0
        if (toconv > len)
2007
0
            toconv = len;
2008
0
    } else {
2009
0
        if (toconv > 180)
2010
0
            toconv = 180;
2011
0
    }
2012
0
    if (toconv * 2 >= written) {
2013
0
        xmlBufferGrow(out, toconv * 2);
2014
0
  written = out->size - out->use - 1;
2015
0
    }
2016
2017
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2018
0
                           in->content, &toconv);
2019
0
    xmlBufferShrink(in, toconv);
2020
0
    out->use += written;
2021
0
    out->content[out->use] = 0;
2022
0
    if (ret == -1) ret = -3;
2023
2024
#ifdef DEBUG_ENCODING
2025
    switch (ret) {
2026
        case 0:
2027
      xmlGenericError(xmlGenericErrorContext,
2028
        "converted %d bytes to %d bytes of input\n",
2029
              toconv, written);
2030
      break;
2031
        case -1:
2032
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2033
              toconv, written, in->use);
2034
      break;
2035
        case -2:
2036
      xmlGenericError(xmlGenericErrorContext,
2037
        "input conversion failed due to input error\n");
2038
      break;
2039
        case -3:
2040
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2041
              toconv, written, in->use);
2042
      break;
2043
  default:
2044
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2045
    }
2046
#endif /* DEBUG_ENCODING */
2047
    /*
2048
     * Ignore when input buffer is not on a boundary
2049
     */
2050
0
    if (ret == -3) ret = 0;
2051
0
    if (ret == -1) ret = 0;
2052
0
    return(ret);
2053
0
}
2054
2055
/**
2056
 * xmlCharEncFirstLine:
2057
 * @handler:  char enconding transformation data structure
2058
 * @out:  an xmlBuffer for the output.
2059
 * @in:  an xmlBuffer for the input
2060
 *
2061
 * Front-end for the encoding handler input function, but handle only
2062
 * the very first line, i.e. limit itself to 45 chars.
2063
 *
2064
 * Returns the number of byte written if success, or
2065
 *     -1 general error
2066
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2067
 *        the result of transformation can't fit into the encoding we want), or
2068
 */
2069
int
2070
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2071
0
                 xmlBufferPtr in) {
2072
0
    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2073
0
}
2074
2075
/**
2076
 * xmlCharEncFirstLineInput:
2077
 * @input: a parser input buffer
2078
 * @len:  number of bytes to convert for the first line, or -1
2079
 *
2080
 * Front-end for the encoding handler input function, but handle only
2081
 * the very first line. Point is that this is based on autodetection
2082
 * of the encoding and once that first line is converted we may find
2083
 * out that a different decoder is needed to process the input.
2084
 *
2085
 * Returns the number of byte written if success, or
2086
 *     -1 general error
2087
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2088
 *        the result of transformation can't fit into the encoding we want), or
2089
 */
2090
int
2091
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2092
28.0k
{
2093
28.0k
    int ret;
2094
28.0k
    size_t written;
2095
28.0k
    size_t toconv;
2096
28.0k
    int c_in;
2097
28.0k
    int c_out;
2098
28.0k
    xmlBufPtr in;
2099
28.0k
    xmlBufPtr out;
2100
2101
28.0k
    if ((input == NULL) || (input->encoder == NULL) ||
2102
28.0k
        (input->buffer == NULL) || (input->raw == NULL))
2103
0
        return (-1);
2104
28.0k
    out = input->buffer;
2105
28.0k
    in = input->raw;
2106
2107
28.0k
    toconv = xmlBufUse(in);
2108
28.0k
    if (toconv == 0)
2109
50
        return (0);
2110
28.0k
    written = xmlBufAvail(out) - 1; /* count '\0' */
2111
    /*
2112
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2113
     * 45 chars should be sufficient to reach the end of the encoding
2114
     * declaration without going too far inside the document content.
2115
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2116
     * The actual value depending on guessed encoding is passed as @len
2117
     * if provided
2118
     */
2119
28.0k
    if (len >= 0) {
2120
0
        if (toconv > (unsigned int) len)
2121
0
            toconv = len;
2122
28.0k
    } else {
2123
28.0k
        if (toconv > 180)
2124
21.6k
            toconv = 180;
2125
28.0k
    }
2126
28.0k
    if (toconv * 2 >= written) {
2127
0
        xmlBufGrow(out, toconv * 2);
2128
0
        written = xmlBufAvail(out) - 1;
2129
0
    }
2130
28.0k
    if (written > 360)
2131
28.0k
        written = 360;
2132
2133
28.0k
    c_in = toconv;
2134
28.0k
    c_out = written;
2135
28.0k
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2136
28.0k
                           xmlBufContent(in), &c_in);
2137
28.0k
    xmlBufShrink(in, c_in);
2138
28.0k
    xmlBufAddLen(out, c_out);
2139
28.0k
    if (ret == -1)
2140
12.5k
        ret = -3;
2141
2142
28.0k
    switch (ret) {
2143
13.5k
        case 0:
2144
#ifdef DEBUG_ENCODING
2145
            xmlGenericError(xmlGenericErrorContext,
2146
                            "converted %d bytes to %d bytes of input\n",
2147
                            c_in, c_out);
2148
#endif
2149
13.5k
            break;
2150
0
        case -1:
2151
#ifdef DEBUG_ENCODING
2152
            xmlGenericError(xmlGenericErrorContext,
2153
                         "converted %d bytes to %d bytes of input, %d left\n",
2154
                            c_in, c_out, (int)xmlBufUse(in));
2155
#endif
2156
0
            break;
2157
12.6k
        case -3:
2158
#ifdef DEBUG_ENCODING
2159
            xmlGenericError(xmlGenericErrorContext,
2160
                        "converted %d bytes to %d bytes of input, %d left\n",
2161
                            c_in, c_out, (int)xmlBufUse(in));
2162
#endif
2163
12.6k
            break;
2164
44
        case -2: {
2165
44
            char buf[50];
2166
44
            const xmlChar *content = xmlBufContent(in);
2167
2168
44
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2169
44
         content[0], content[1],
2170
44
         content[2], content[3]);
2171
44
      buf[49] = 0;
2172
44
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2173
44
        "input conversion failed due to input error, bytes %s\n",
2174
44
               buf);
2175
44
        }
2176
28.0k
    }
2177
    /*
2178
     * Ignore when input buffer is not on a boundary
2179
     */
2180
28.0k
    if (ret == -3) ret = 0;
2181
28.0k
    if (ret == -1) ret = 0;
2182
28.0k
    return(ret);
2183
28.0k
}
2184
2185
/**
2186
 * xmlCharEncInput:
2187
 * @input: a parser input buffer
2188
 * @flush: try to flush all the raw buffer
2189
 *
2190
 * Generic front-end for the encoding handler on parser input
2191
 *
2192
 * Returns the number of byte written if success, or
2193
 *     -1 general error
2194
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2195
 *        the result of transformation can't fit into the encoding we want), or
2196
 */
2197
int
2198
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2199
7.59M
{
2200
7.59M
    int ret;
2201
7.59M
    size_t written;
2202
7.59M
    size_t toconv;
2203
7.59M
    int c_in;
2204
7.59M
    int c_out;
2205
7.59M
    xmlBufPtr in;
2206
7.59M
    xmlBufPtr out;
2207
2208
7.59M
    if ((input == NULL) || (input->encoder == NULL) ||
2209
7.59M
        (input->buffer == NULL) || (input->raw == NULL))
2210
0
        return (-1);
2211
7.59M
    out = input->buffer;
2212
7.59M
    in = input->raw;
2213
2214
7.59M
    toconv = xmlBufUse(in);
2215
7.59M
    if (toconv == 0)
2216
5.77k
        return (0);
2217
7.59M
    if ((toconv > 64 * 1024) && (flush == 0))
2218
1
        toconv = 64 * 1024;
2219
7.59M
    written = xmlBufAvail(out);
2220
7.59M
    if (written > 0)
2221
7.59M
        written--; /* count '\0' */
2222
7.59M
    if (toconv * 2 >= written) {
2223
88.0k
        xmlBufGrow(out, toconv * 2);
2224
88.0k
        written = xmlBufAvail(out);
2225
88.0k
        if (written > 0)
2226
88.0k
            written--; /* count '\0' */
2227
88.0k
    }
2228
7.59M
    if ((written > 128 * 1024) && (flush == 0))
2229
4
        written = 128 * 1024;
2230
2231
7.59M
    c_in = toconv;
2232
7.59M
    c_out = written;
2233
7.59M
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2234
7.59M
                           xmlBufContent(in), &c_in);
2235
7.59M
    xmlBufShrink(in, c_in);
2236
7.59M
    xmlBufAddLen(out, c_out);
2237
7.59M
    if (ret == -1)
2238
143k
        ret = -3;
2239
2240
7.59M
    switch (ret) {
2241
1.99M
        case 0:
2242
#ifdef DEBUG_ENCODING
2243
            xmlGenericError(xmlGenericErrorContext,
2244
                            "converted %d bytes to %d bytes of input\n",
2245
                            c_in, c_out);
2246
#endif
2247
1.99M
            break;
2248
0
        case -1:
2249
#ifdef DEBUG_ENCODING
2250
            xmlGenericError(xmlGenericErrorContext,
2251
                         "converted %d bytes to %d bytes of input, %d left\n",
2252
                            c_in, c_out, (int)xmlBufUse(in));
2253
#endif
2254
0
            break;
2255
145k
        case -3:
2256
#ifdef DEBUG_ENCODING
2257
            xmlGenericError(xmlGenericErrorContext,
2258
                        "converted %d bytes to %d bytes of input, %d left\n",
2259
                            c_in, c_out, (int)xmlBufUse(in));
2260
#endif
2261
145k
            break;
2262
5.77k
        case -2: {
2263
5.77k
            char buf[50];
2264
5.77k
            const xmlChar *content = xmlBufContent(in);
2265
2266
5.77k
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2267
5.77k
         content[0], content[1],
2268
5.77k
         content[2], content[3]);
2269
5.77k
      buf[49] = 0;
2270
5.77k
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2271
5.77k
        "input conversion failed due to input error, bytes %s\n",
2272
5.77k
               buf);
2273
5.77k
        }
2274
7.59M
    }
2275
    /*
2276
     * Ignore when input buffer is not on a boundary
2277
     */
2278
7.59M
    if (ret == -3)
2279
145k
        ret = 0;
2280
7.59M
    return (c_out? c_out : ret);
2281
7.59M
}
2282
2283
/**
2284
 * xmlCharEncInFunc:
2285
 * @handler:  char encoding transformation data structure
2286
 * @out:  an xmlBuffer for the output.
2287
 * @in:  an xmlBuffer for the input
2288
 *
2289
 * Generic front-end for the encoding handler input function
2290
 *
2291
 * Returns the number of byte written if success, or
2292
 *     -1 general error
2293
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2294
 *        the result of transformation can't fit into the encoding we want), or
2295
 */
2296
int
2297
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2298
                 xmlBufferPtr in)
2299
0
{
2300
0
    int ret;
2301
0
    int written;
2302
0
    int toconv;
2303
2304
0
    if (handler == NULL)
2305
0
        return (-1);
2306
0
    if (out == NULL)
2307
0
        return (-1);
2308
0
    if (in == NULL)
2309
0
        return (-1);
2310
2311
0
    toconv = in->use;
2312
0
    if (toconv == 0)
2313
0
        return (0);
2314
0
    written = out->size - out->use -1; /* count '\0' */
2315
0
    if (toconv * 2 >= written) {
2316
0
        xmlBufferGrow(out, out->size + toconv * 2);
2317
0
        written = out->size - out->use - 1;
2318
0
    }
2319
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2320
0
                           in->content, &toconv);
2321
0
    xmlBufferShrink(in, toconv);
2322
0
    out->use += written;
2323
0
    out->content[out->use] = 0;
2324
0
    if (ret == -1)
2325
0
        ret = -3;
2326
2327
0
    switch (ret) {
2328
0
        case 0:
2329
#ifdef DEBUG_ENCODING
2330
            xmlGenericError(xmlGenericErrorContext,
2331
                            "converted %d bytes to %d bytes of input\n",
2332
                            toconv, written);
2333
#endif
2334
0
            break;
2335
0
        case -1:
2336
#ifdef DEBUG_ENCODING
2337
            xmlGenericError(xmlGenericErrorContext,
2338
                         "converted %d bytes to %d bytes of input, %d left\n",
2339
                            toconv, written, in->use);
2340
#endif
2341
0
            break;
2342
0
        case -3:
2343
#ifdef DEBUG_ENCODING
2344
            xmlGenericError(xmlGenericErrorContext,
2345
                        "converted %d bytes to %d bytes of input, %d left\n",
2346
                            toconv, written, in->use);
2347
#endif
2348
0
            break;
2349
0
        case -2: {
2350
0
            char buf[50];
2351
2352
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353
0
         in->content[0], in->content[1],
2354
0
         in->content[2], in->content[3]);
2355
0
      buf[49] = 0;
2356
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2357
0
        "input conversion failed due to input error, bytes %s\n",
2358
0
               buf);
2359
0
        }
2360
0
    }
2361
    /*
2362
     * Ignore when input buffer is not on a boundary
2363
     */
2364
0
    if (ret == -3)
2365
0
        ret = 0;
2366
0
    return (written? written : ret);
2367
0
}
2368
2369
#ifdef LIBXML_OUTPUT_ENABLED
2370
/**
2371
 * xmlCharEncOutput:
2372
 * @output: a parser output buffer
2373
 * @init: is this an initialization call without data
2374
 *
2375
 * Generic front-end for the encoding handler on parser output
2376
 * a first call with @init == 1 has to be made first to initiate the
2377
 * output in case of non-stateless encoding needing to initiate their
2378
 * state or the output (like the BOM in UTF16).
2379
 * In case of UTF8 sequence conversion errors for the given encoder,
2380
 * the content will be automatically remapped to a CharRef sequence.
2381
 *
2382
 * Returns the number of byte written if success, or
2383
 *     -1 general error
2384
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2385
 *        the result of transformation can't fit into the encoding we want), or
2386
 */
2387
int
2388
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2389
0
{
2390
0
    int ret;
2391
0
    size_t written;
2392
0
    size_t writtentot = 0;
2393
0
    size_t toconv;
2394
0
    int c_in;
2395
0
    int c_out;
2396
0
    xmlBufPtr in;
2397
0
    xmlBufPtr out;
2398
2399
0
    if ((output == NULL) || (output->encoder == NULL) ||
2400
0
        (output->buffer == NULL) || (output->conv == NULL))
2401
0
        return (-1);
2402
0
    out = output->conv;
2403
0
    in = output->buffer;
2404
2405
0
retry:
2406
2407
0
    written = xmlBufAvail(out);
2408
0
    if (written > 0)
2409
0
        written--; /* count '\0' */
2410
2411
    /*
2412
     * First specific handling of the initialization call
2413
     */
2414
0
    if (init) {
2415
0
        c_in = 0;
2416
0
        c_out = written;
2417
        /* TODO: Check return value. */
2418
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2419
0
                          NULL, &c_in);
2420
0
        xmlBufAddLen(out, c_out);
2421
#ifdef DEBUG_ENCODING
2422
  xmlGenericError(xmlGenericErrorContext,
2423
    "initialized encoder\n");
2424
#endif
2425
0
        return(0);
2426
0
    }
2427
2428
    /*
2429
     * Conversion itself.
2430
     */
2431
0
    toconv = xmlBufUse(in);
2432
0
    if (toconv == 0)
2433
0
        return (0);
2434
0
    if (toconv > 64 * 1024)
2435
0
        toconv = 64 * 1024;
2436
0
    if (toconv * 4 >= written) {
2437
0
        xmlBufGrow(out, toconv * 4);
2438
0
        written = xmlBufAvail(out) - 1;
2439
0
    }
2440
0
    if (written > 256 * 1024)
2441
0
        written = 256 * 1024;
2442
2443
0
    c_in = toconv;
2444
0
    c_out = written;
2445
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2446
0
                            xmlBufContent(in), &c_in);
2447
0
    xmlBufShrink(in, c_in);
2448
0
    xmlBufAddLen(out, c_out);
2449
0
    writtentot += c_out;
2450
0
    if (ret == -1) {
2451
0
        if (c_out > 0) {
2452
            /* Can be a limitation of iconv or uconv */
2453
0
            goto retry;
2454
0
        }
2455
0
        ret = -3;
2456
0
    }
2457
2458
0
    if (ret >= 0) output += ret;
2459
2460
    /*
2461
     * Attempt to handle error cases
2462
     */
2463
0
    switch (ret) {
2464
0
        case 0:
2465
#ifdef DEBUG_ENCODING
2466
      xmlGenericError(xmlGenericErrorContext,
2467
        "converted %d bytes to %d bytes of output\n",
2468
              c_in, c_out);
2469
#endif
2470
0
      break;
2471
0
        case -1:
2472
#ifdef DEBUG_ENCODING
2473
      xmlGenericError(xmlGenericErrorContext,
2474
        "output conversion failed by lack of space\n");
2475
#endif
2476
0
      break;
2477
0
        case -3:
2478
#ifdef DEBUG_ENCODING
2479
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2480
              c_in, c_out, (int) xmlBufUse(in));
2481
#endif
2482
0
      break;
2483
0
        case -4:
2484
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2485
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2486
0
            ret = -1;
2487
0
            break;
2488
0
        case -2: {
2489
0
      xmlChar charref[20];
2490
0
      int len = (int) xmlBufUse(in);
2491
0
            xmlChar *content = xmlBufContent(in);
2492
0
      int cur, charrefLen;
2493
2494
0
      cur = xmlGetUTF8Char(content, &len);
2495
0
      if (cur <= 0)
2496
0
                break;
2497
2498
#ifdef DEBUG_ENCODING
2499
            xmlGenericError(xmlGenericErrorContext,
2500
                    "handling output conversion error\n");
2501
            xmlGenericError(xmlGenericErrorContext,
2502
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2503
                    content[0], content[1],
2504
                    content[2], content[3]);
2505
#endif
2506
            /*
2507
             * Removes the UTF8 sequence, and replace it by a charref
2508
             * and continue the transcoding phase, hoping the error
2509
             * did not mangle the encoder state.
2510
             */
2511
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2512
0
                             "&#%d;", cur);
2513
0
            xmlBufShrink(in, len);
2514
0
            xmlBufGrow(out, charrefLen * 4);
2515
0
            c_out = xmlBufAvail(out) - 1;
2516
0
            c_in = charrefLen;
2517
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2518
0
                                    charref, &c_in);
2519
2520
0
      if ((ret < 0) || (c_in != charrefLen)) {
2521
0
    char buf[50];
2522
2523
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2524
0
       content[0], content[1],
2525
0
       content[2], content[3]);
2526
0
    buf[49] = 0;
2527
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2528
0
        "output conversion failed due to conv error, bytes %s\n",
2529
0
             buf);
2530
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2531
0
        content[0] = ' ';
2532
0
                break;
2533
0
      }
2534
2535
0
            xmlBufAddLen(out, c_out);
2536
0
            writtentot += c_out;
2537
0
            goto retry;
2538
0
  }
2539
0
    }
2540
0
    return(ret);
2541
0
}
2542
#endif
2543
2544
/**
2545
 * xmlCharEncOutFunc:
2546
 * @handler:  char enconding transformation data structure
2547
 * @out:  an xmlBuffer for the output.
2548
 * @in:  an xmlBuffer for the input
2549
 *
2550
 * Generic front-end for the encoding handler output function
2551
 * a first call with @in == NULL has to be made firs to initiate the
2552
 * output in case of non-stateless encoding needing to initiate their
2553
 * state or the output (like the BOM in UTF16).
2554
 * In case of UTF8 sequence conversion errors for the given encoder,
2555
 * the content will be automatically remapped to a CharRef sequence.
2556
 *
2557
 * Returns the number of byte written if success, or
2558
 *     -1 general error
2559
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2560
 *        the result of transformation can't fit into the encoding we want), or
2561
 */
2562
int
2563
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2564
0
                  xmlBufferPtr in) {
2565
0
    int ret;
2566
0
    int written;
2567
0
    int writtentot = 0;
2568
0
    int toconv;
2569
0
    int output = 0;
2570
2571
0
    if (handler == NULL) return(-1);
2572
0
    if (out == NULL) return(-1);
2573
2574
0
retry:
2575
2576
0
    written = out->size - out->use;
2577
2578
0
    if (written > 0)
2579
0
  written--; /* Gennady: count '/0' */
2580
2581
    /*
2582
     * First specific handling of in = NULL, i.e. the initialization call
2583
     */
2584
0
    if (in == NULL) {
2585
0
        toconv = 0;
2586
        /* TODO: Check return value. */
2587
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2588
0
                          NULL, &toconv);
2589
0
        out->use += written;
2590
0
        out->content[out->use] = 0;
2591
#ifdef DEBUG_ENCODING
2592
  xmlGenericError(xmlGenericErrorContext,
2593
    "initialized encoder\n");
2594
#endif
2595
0
        return(0);
2596
0
    }
2597
2598
    /*
2599
     * Conversion itself.
2600
     */
2601
0
    toconv = in->use;
2602
0
    if (toconv == 0)
2603
0
  return(0);
2604
0
    if (toconv * 4 >= written) {
2605
0
        xmlBufferGrow(out, toconv * 4);
2606
0
  written = out->size - out->use - 1;
2607
0
    }
2608
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2609
0
                            in->content, &toconv);
2610
0
    xmlBufferShrink(in, toconv);
2611
0
    out->use += written;
2612
0
    writtentot += written;
2613
0
    out->content[out->use] = 0;
2614
0
    if (ret == -1) {
2615
0
        if (written > 0) {
2616
            /* Can be a limitation of iconv or uconv */
2617
0
            goto retry;
2618
0
        }
2619
0
        ret = -3;
2620
0
    }
2621
2622
0
    if (ret >= 0) output += ret;
2623
2624
    /*
2625
     * Attempt to handle error cases
2626
     */
2627
0
    switch (ret) {
2628
0
        case 0:
2629
#ifdef DEBUG_ENCODING
2630
      xmlGenericError(xmlGenericErrorContext,
2631
        "converted %d bytes to %d bytes of output\n",
2632
              toconv, written);
2633
#endif
2634
0
      break;
2635
0
        case -1:
2636
#ifdef DEBUG_ENCODING
2637
      xmlGenericError(xmlGenericErrorContext,
2638
        "output conversion failed by lack of space\n");
2639
#endif
2640
0
      break;
2641
0
        case -3:
2642
#ifdef DEBUG_ENCODING
2643
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2644
              toconv, written, in->use);
2645
#endif
2646
0
      break;
2647
0
        case -4:
2648
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2649
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2650
0
      ret = -1;
2651
0
            break;
2652
0
        case -2: {
2653
0
      xmlChar charref[20];
2654
0
      int len = in->use;
2655
0
      const xmlChar *utf = (const xmlChar *) in->content;
2656
0
      int cur, charrefLen;
2657
2658
0
      cur = xmlGetUTF8Char(utf, &len);
2659
0
      if (cur <= 0)
2660
0
                break;
2661
2662
#ifdef DEBUG_ENCODING
2663
            xmlGenericError(xmlGenericErrorContext,
2664
                    "handling output conversion error\n");
2665
            xmlGenericError(xmlGenericErrorContext,
2666
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2667
                    in->content[0], in->content[1],
2668
                    in->content[2], in->content[3]);
2669
#endif
2670
            /*
2671
             * Removes the UTF8 sequence, and replace it by a charref
2672
             * and continue the transcoding phase, hoping the error
2673
             * did not mangle the encoder state.
2674
             */
2675
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2676
0
                             "&#%d;", cur);
2677
0
            xmlBufferShrink(in, len);
2678
0
            xmlBufferGrow(out, charrefLen * 4);
2679
0
      written = out->size - out->use - 1;
2680
0
            toconv = charrefLen;
2681
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2682
0
                                    charref, &toconv);
2683
2684
0
      if ((ret < 0) || (toconv != charrefLen)) {
2685
0
    char buf[50];
2686
2687
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2688
0
       in->content[0], in->content[1],
2689
0
       in->content[2], in->content[3]);
2690
0
    buf[49] = 0;
2691
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2692
0
        "output conversion failed due to conv error, bytes %s\n",
2693
0
             buf);
2694
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2695
0
        in->content[0] = ' ';
2696
0
          break;
2697
0
      }
2698
2699
0
            out->use += written;
2700
0
            writtentot += written;
2701
0
            out->content[out->use] = 0;
2702
0
            goto retry;
2703
0
  }
2704
0
    }
2705
0
    return(ret);
2706
0
}
2707
2708
/**
2709
 * xmlCharEncCloseFunc:
2710
 * @handler:  char enconding transformation data structure
2711
 *
2712
 * Generic front-end for encoding handler close function
2713
 *
2714
 * Returns 0 if success, or -1 in case of error
2715
 */
2716
int
2717
32.6k
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2718
32.6k
    int ret = 0;
2719
32.6k
    int tofree = 0;
2720
32.6k
    int i, handler_in_list = 0;
2721
2722
32.6k
    if (handler == NULL) return(-1);
2723
32.6k
    if (handler->name == NULL) return(-1);
2724
32.6k
    if (handlers != NULL) {
2725
258k
        for (i = 0;i < nbCharEncodingHandler; i++) {
2726
232k
            if (handler == handlers[i]) {
2727
6.62k
          handler_in_list = 1;
2728
6.62k
    break;
2729
6.62k
      }
2730
232k
  }
2731
32.6k
    }
2732
32.6k
#ifdef LIBXML_ICONV_ENABLED
2733
    /*
2734
     * Iconv handlers can be used only once, free the whole block.
2735
     * and the associated icon resources.
2736
     */
2737
32.6k
    if ((handler_in_list == 0) &&
2738
26.0k
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2739
26.0k
        tofree = 1;
2740
26.0k
  if (handler->iconv_out != NULL) {
2741
26.0k
      if (iconv_close(handler->iconv_out))
2742
0
    ret = -1;
2743
26.0k
      handler->iconv_out = NULL;
2744
26.0k
  }
2745
26.0k
  if (handler->iconv_in != NULL) {
2746
26.0k
      if (iconv_close(handler->iconv_in))
2747
0
    ret = -1;
2748
26.0k
      handler->iconv_in = NULL;
2749
26.0k
  }
2750
26.0k
    }
2751
32.6k
#endif /* LIBXML_ICONV_ENABLED */
2752
#ifdef LIBXML_ICU_ENABLED
2753
    if ((handler_in_list == 0) &&
2754
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2755
        tofree = 1;
2756
  if (handler->uconv_out != NULL) {
2757
      closeIcuConverter(handler->uconv_out);
2758
      handler->uconv_out = NULL;
2759
  }
2760
  if (handler->uconv_in != NULL) {
2761
      closeIcuConverter(handler->uconv_in);
2762
      handler->uconv_in = NULL;
2763
  }
2764
    }
2765
#endif
2766
32.6k
    if (tofree) {
2767
        /* free up only dynamic handlers iconv/uconv */
2768
26.0k
        if (handler->name != NULL)
2769
26.0k
            xmlFree(handler->name);
2770
26.0k
        handler->name = NULL;
2771
26.0k
        xmlFree(handler);
2772
26.0k
    }
2773
#ifdef DEBUG_ENCODING
2774
    if (ret)
2775
        xmlGenericError(xmlGenericErrorContext,
2776
    "failed to close the encoding handler\n");
2777
    else
2778
        xmlGenericError(xmlGenericErrorContext,
2779
    "closed the encoding handler\n");
2780
#endif
2781
2782
32.6k
    return(ret);
2783
32.6k
}
2784
2785
/**
2786
 * xmlByteConsumed:
2787
 * @ctxt: an XML parser context
2788
 *
2789
 * This function provides the current index of the parser relative
2790
 * to the start of the current entity. This function is computed in
2791
 * bytes from the beginning starting at zero and finishing at the
2792
 * size in byte of the file if parsing a file. The function is
2793
 * of constant cost if the input is UTF-8 but can be costly if run
2794
 * on non-UTF-8 input.
2795
 *
2796
 * Returns the index in bytes from the beginning of the entity or -1
2797
 *         in case the index could not be computed.
2798
 */
2799
long
2800
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2801
0
    xmlParserInputPtr in;
2802
2803
0
    if (ctxt == NULL) return(-1);
2804
0
    in = ctxt->input;
2805
0
    if (in == NULL)  return(-1);
2806
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2807
0
        unsigned int unused = 0;
2808
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2809
        /*
2810
   * Encoding conversion, compute the number of unused original
2811
   * bytes from the input not consumed and substract that from
2812
   * the raw consumed value, this is not a cheap operation
2813
   */
2814
0
        if (in->end - in->cur > 0) {
2815
0
      unsigned char convbuf[32000];
2816
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2817
0
      int toconv = in->end - in->cur, written = 32000;
2818
2819
0
      int ret;
2820
2821
0
            do {
2822
0
                toconv = in->end - cur;
2823
0
                written = 32000;
2824
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2825
0
                                        cur, &toconv);
2826
0
                if (ret < 0) {
2827
0
                    if (written > 0)
2828
0
                        ret = -2;
2829
0
                    else
2830
0
                        return(-1);
2831
0
                }
2832
0
                unused += written;
2833
0
                cur += toconv;
2834
0
            } while (ret == -2);
2835
0
  }
2836
0
  if (in->buf->rawconsumed < unused)
2837
0
      return(-1);
2838
0
  return(in->buf->rawconsumed - unused);
2839
0
    }
2840
0
    return(in->consumed + (in->cur - in->base));
2841
0
}
2842
2843
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2844
#ifdef LIBXML_ISO8859X_ENABLED
2845
2846
/**
2847
 * UTF8ToISO8859x:
2848
 * @out:  a pointer to an array of bytes to store the result
2849
 * @outlen:  the length of @out
2850
 * @in:  a pointer to an array of UTF-8 chars
2851
 * @inlen:  the length of @in
2852
 * @xlattable: the 2-level transcoding table
2853
 *
2854
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2855
 * block of chars out.
2856
 *
2857
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2858
 * The value of @inlen after return is the number of octets consumed
2859
 *     as the return value is positive, else unpredictable.
2860
 * The value of @outlen after return is the number of ocetes consumed.
2861
 */
2862
static int
2863
UTF8ToISO8859x(unsigned char* out, int *outlen,
2864
              const unsigned char* in, int *inlen,
2865
              unsigned char const *xlattable) {
2866
    const unsigned char* outstart = out;
2867
    const unsigned char* inend;
2868
    const unsigned char* instart = in;
2869
    const unsigned char* processed = in;
2870
2871
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2872
        (xlattable == NULL))
2873
  return(-1);
2874
    if (in == NULL) {
2875
        /*
2876
        * initialization nothing to do
2877
        */
2878
        *outlen = 0;
2879
        *inlen = 0;
2880
        return(0);
2881
    }
2882
    inend = in + (*inlen);
2883
    while (in < inend) {
2884
        unsigned char d = *in++;
2885
        if  (d < 0x80)  {
2886
            *out++ = d;
2887
        } else if (d < 0xC0) {
2888
            /* trailing byte in leading position */
2889
            *outlen = out - outstart;
2890
            *inlen = processed - instart;
2891
            return(-2);
2892
        } else if (d < 0xE0) {
2893
            unsigned char c;
2894
            if (!(in < inend)) {
2895
                /* trailing byte not in input buffer */
2896
                *outlen = out - outstart;
2897
                *inlen = processed - instart;
2898
                return(-3);
2899
            }
2900
            c = *in++;
2901
            if ((c & 0xC0) != 0x80) {
2902
                /* not a trailing byte */
2903
                *outlen = out - outstart;
2904
                *inlen = processed - instart;
2905
                return(-2);
2906
            }
2907
            c = c & 0x3F;
2908
            d = d & 0x1F;
2909
            d = xlattable [48 + c + xlattable [d] * 64];
2910
            if (d == 0) {
2911
                /* not in character set */
2912
                *outlen = out - outstart;
2913
                *inlen = processed - instart;
2914
                return(-2);
2915
            }
2916
            *out++ = d;
2917
        } else if (d < 0xF0) {
2918
            unsigned char c1;
2919
            unsigned char c2;
2920
            if (!(in < inend - 1)) {
2921
                /* trailing bytes not in input buffer */
2922
                *outlen = out - outstart;
2923
                *inlen = processed - instart;
2924
                return(-3);
2925
            }
2926
            c1 = *in++;
2927
            if ((c1 & 0xC0) != 0x80) {
2928
                /* not a trailing byte (c1) */
2929
                *outlen = out - outstart;
2930
                *inlen = processed - instart;
2931
                return(-2);
2932
            }
2933
            c2 = *in++;
2934
            if ((c2 & 0xC0) != 0x80) {
2935
                /* not a trailing byte (c2) */
2936
                *outlen = out - outstart;
2937
                *inlen = processed - instart;
2938
                return(-2);
2939
            }
2940
            c1 = c1 & 0x3F;
2941
            c2 = c2 & 0x3F;
2942
      d = d & 0x0F;
2943
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2944
      xlattable [32 + d] * 64] * 64];
2945
            if (d == 0) {
2946
                /* not in character set */
2947
                *outlen = out - outstart;
2948
                *inlen = processed - instart;
2949
                return(-2);
2950
            }
2951
            *out++ = d;
2952
        } else {
2953
            /* cannot transcode >= U+010000 */
2954
            *outlen = out - outstart;
2955
            *inlen = processed - instart;
2956
            return(-2);
2957
        }
2958
        processed = in;
2959
    }
2960
    *outlen = out - outstart;
2961
    *inlen = processed - instart;
2962
    return(*outlen);
2963
}
2964
2965
/**
2966
 * ISO8859xToUTF8
2967
 * @out:  a pointer to an array of bytes to store the result
2968
 * @outlen:  the length of @out
2969
 * @in:  a pointer to an array of ISO Latin 1 chars
2970
 * @inlen:  the length of @in
2971
 *
2972
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2973
 * block of chars out.
2974
 * Returns 0 if success, or -1 otherwise
2975
 * The value of @inlen after return is the number of octets consumed
2976
 * The value of @outlen after return is the number of ocetes produced.
2977
 */
2978
static int
2979
ISO8859xToUTF8(unsigned char* out, int *outlen,
2980
              const unsigned char* in, int *inlen,
2981
              unsigned short const *unicodetable) {
2982
    unsigned char* outstart = out;
2983
    unsigned char* outend;
2984
    const unsigned char* instart = in;
2985
    const unsigned char* inend;
2986
    const unsigned char* instop;
2987
    unsigned int c;
2988
2989
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2990
        (in == NULL) || (unicodetable == NULL))
2991
  return(-1);
2992
    outend = out + *outlen;
2993
    inend = in + *inlen;
2994
    instop = inend;
2995
2996
    while ((in < inend) && (out < outend - 2)) {
2997
        if (*in >= 0x80) {
2998
            c = unicodetable [*in - 0x80];
2999
            if (c == 0) {
3000
                /* undefined code point */
3001
                *outlen = out - outstart;
3002
                *inlen = in - instart;
3003
                return (-1);
3004
            }
3005
            if (c < 0x800) {
3006
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3007
                *out++ = (c & 0x3F) | 0x80;
3008
            } else {
3009
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3010
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3011
                *out++ = (c & 0x3F) | 0x80;
3012
            }
3013
            ++in;
3014
        }
3015
        if (instop - in > outend - out) instop = in + (outend - out);
3016
        while ((*in < 0x80) && (in < instop)) {
3017
            *out++ = *in++;
3018
        }
3019
    }
3020
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3021
        *out++ =  *in++;
3022
    }
3023
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024
        *out++ =  *in++;
3025
    }
3026
    *outlen = out - outstart;
3027
    *inlen = in - instart;
3028
    return (*outlen);
3029
}
3030
3031
3032
/************************************************************************
3033
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3034
 ************************************************************************/
3035
3036
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3037
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3038
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3039
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3040
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3041
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3042
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3043
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3044
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3045
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3046
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3047
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3048
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3049
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3050
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3051
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3052
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3053
};
3054
3055
static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3056
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3057
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3064
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3065
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3066
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3068
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3071
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3072
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3073
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3076
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3077
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3078
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3079
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3080
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3081
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3082
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3083
};
3084
3085
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3086
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3087
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3088
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3089
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3090
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3091
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3092
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3093
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3094
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3095
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3096
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3097
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3098
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3099
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3100
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3101
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3102
};
3103
3104
static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3105
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3106
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3113
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3114
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3115
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3118
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3122
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3131
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3132
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3133
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3134
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3135
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3136
};
3137
3138
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3139
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3140
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3141
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3142
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3143
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3144
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3145
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3146
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3147
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3148
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3149
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3150
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3151
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3152
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3153
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3154
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3155
};
3156
3157
static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3158
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3159
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3166
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3167
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3168
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3169
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3170
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3172
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3175
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3176
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3182
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3183
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3184
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3185
};
3186
3187
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3188
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3189
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3190
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3191
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3192
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3193
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3194
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3195
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3196
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3197
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3198
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3199
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3200
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3201
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3202
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3203
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3204
};
3205
3206
static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3207
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3215
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3216
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3217
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3219
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3220
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3222
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234
};
3235
3236
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3237
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3238
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3239
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3240
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3241
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3242
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3243
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3244
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3245
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3246
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3247
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3248
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3249
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3250
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3251
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3253
};
3254
3255
static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3256
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3258
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3264
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3265
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3272
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3273
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3274
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3275
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3276
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279
};
3280
3281
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3282
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3283
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3284
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3285
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3286
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3287
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3288
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3289
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3290
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3291
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3292
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3293
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3294
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3295
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3296
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3297
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3298
};
3299
3300
static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3301
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3302
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3309
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3310
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3311
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3312
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3325
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3327
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
};
3333
3334
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3335
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3336
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3337
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3338
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3339
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3340
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3341
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3342
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3343
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3344
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3345
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3347
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3348
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3349
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3350
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3351
};
3352
3353
static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3354
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3356
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3362
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3363
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3364
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3365
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3378
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
};
3386
3387
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3388
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3389
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3390
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3391
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3392
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3393
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3394
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3395
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3396
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3397
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3398
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3399
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3400
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3401
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3402
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3403
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3404
};
3405
3406
static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3407
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3415
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3416
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3417
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3418
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3420
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3424
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430
};
3431
3432
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3433
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3434
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3435
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3436
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3437
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3438
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3439
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3440
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3441
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3442
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3443
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3444
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3445
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3446
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3447
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3448
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3449
};
3450
3451
static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3452
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3460
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3461
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3462
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3464
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3466
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3470
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3471
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3480
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3481
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3482
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3483
};
3484
3485
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3486
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3487
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3488
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3489
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3490
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3491
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3492
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3493
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3494
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3495
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3496
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3497
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3498
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3499
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3500
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3501
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3502
};
3503
3504
static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3505
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3513
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3514
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3520
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3524
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3529
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
};
3533
3534
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3535
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3536
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3537
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3538
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3539
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3540
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3541
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3542
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3543
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3544
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3545
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3546
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3547
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3548
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3549
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3550
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3551
};
3552
3553
static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3554
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3562
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3563
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3564
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3565
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3571
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3575
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3577
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3579
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3580
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3581
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3582
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3583
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3584
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3585
};
3586
3587
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3588
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3589
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3590
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3591
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3592
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3593
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3594
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3595
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3596
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3597
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3598
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3599
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3600
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3601
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3602
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3603
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3604
};
3605
3606
static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3607
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3615
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3616
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3617
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3622
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3642
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3644
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3647
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3648
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3649
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3650
};
3651
3652
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3653
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3658
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3659
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3660
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3661
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3664
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3665
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3668
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3669
};
3670
3671
static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3672
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3682
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3683
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3695
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3696
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3697
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3698
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3699
};
3700
3701
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3702
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3703
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3704
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3705
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3706
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3707
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3708
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3709
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3710
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3711
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3712
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3713
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3714
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3715
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3716
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3717
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3718
};
3719
3720
static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3721
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3729
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3730
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3731
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3732
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3733
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3734
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3738
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3740
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3750
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3754
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3758
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3760
};
3761
3762
3763
/*
3764
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3765
 */
3766
3767
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3768
    const unsigned char* in, int *inlen) {
3769
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3770
}
3771
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3772
    const unsigned char* in, int *inlen) {
3773
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3774
}
3775
3776
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3777
    const unsigned char* in, int *inlen) {
3778
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3779
}
3780
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3781
    const unsigned char* in, int *inlen) {
3782
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3783
}
3784
3785
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3786
    const unsigned char* in, int *inlen) {
3787
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3788
}
3789
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3790
    const unsigned char* in, int *inlen) {
3791
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3792
}
3793
3794
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3795
    const unsigned char* in, int *inlen) {
3796
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3797
}
3798
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3799
    const unsigned char* in, int *inlen) {
3800
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3801
}
3802
3803
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3804
    const unsigned char* in, int *inlen) {
3805
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3806
}
3807
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3808
    const unsigned char* in, int *inlen) {
3809
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3810
}
3811
3812
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3813
    const unsigned char* in, int *inlen) {
3814
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3815
}
3816
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3817
    const unsigned char* in, int *inlen) {
3818
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3819
}
3820
3821
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3822
    const unsigned char* in, int *inlen) {
3823
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3824
}
3825
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3826
    const unsigned char* in, int *inlen) {
3827
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3828
}
3829
3830
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3831
    const unsigned char* in, int *inlen) {
3832
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3833
}
3834
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3835
    const unsigned char* in, int *inlen) {
3836
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3837
}
3838
3839
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3840
    const unsigned char* in, int *inlen) {
3841
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3842
}
3843
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3844
    const unsigned char* in, int *inlen) {
3845
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3846
}
3847
3848
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3849
    const unsigned char* in, int *inlen) {
3850
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3851
}
3852
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3853
    const unsigned char* in, int *inlen) {
3854
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3855
}
3856
3857
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3858
    const unsigned char* in, int *inlen) {
3859
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3860
}
3861
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3862
    const unsigned char* in, int *inlen) {
3863
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3864
}
3865
3866
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3867
    const unsigned char* in, int *inlen) {
3868
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3869
}
3870
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3871
    const unsigned char* in, int *inlen) {
3872
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3873
}
3874
3875
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3878
}
3879
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3880
    const unsigned char* in, int *inlen) {
3881
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3882
}
3883
3884
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3887
}
3888
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3889
    const unsigned char* in, int *inlen) {
3890
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3891
}
3892
3893
static void
3894
xmlRegisterCharEncodingHandlersISO8859x (void) {
3895
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3896
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3897
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3898
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3899
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3900
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3901
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3902
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3903
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3904
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3905
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3906
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3907
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3908
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3909
}
3910
3911
#endif
3912
#endif
3913
3914
#define bottom_encoding
3915
#include "elfgcchack.h"