Coverage Report

Created: 2025-08-04 07:15

/src/libxml2-2.9.7/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * encoding.c : implements the encoding conversion functions needed for XML
3
 *
4
 * Related specs:
5
 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8
 * [ISO-8859-1]   ISO Latin-1 characters codes.
9
 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10
 *                Worldwide Character Encoding -- Version 1.0", Addison-
11
 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12
 *                described in Unicode Technical Report #4.
13
 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14
 *                Information Interchange, ANSI X3.4-1986.
15
 *
16
 * See Copyright for the status of this software.
17
 *
18
 * daniel@veillard.com
19
 *
20
 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21
 */
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
29
#ifdef HAVE_CTYPE_H
30
#include <ctype.h>
31
#endif
32
#ifdef HAVE_STDLIB_H
33
#include <stdlib.h>
34
#endif
35
#ifdef LIBXML_ICONV_ENABLED
36
#ifdef HAVE_ERRNO_H
37
#include <errno.h>
38
#endif
39
#endif
40
#include <libxml/encoding.h>
41
#include <libxml/xmlmemory.h>
42
#ifdef LIBXML_HTML_ENABLED
43
#include <libxml/HTMLparser.h>
44
#endif
45
#include <libxml/globals.h>
46
#include <libxml/xmlerror.h>
47
48
#include "buf.h"
49
#include "enc.h"
50
51
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56
struct _xmlCharEncodingAlias {
57
    const char *name;
58
    const char *alias;
59
};
60
61
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62
static int xmlCharEncodingAliasesNb = 0;
63
static int xmlCharEncodingAliasesMax = 0;
64
65
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66
#if 0
67
#define DEBUG_ENCODING  /* Define this to get encoding traces */
68
#endif
69
#else
70
#ifdef LIBXML_ISO8859X_ENABLED
71
static void xmlRegisterCharEncodingHandlersISO8859x (void);
72
#endif
73
#endif
74
75
static int xmlLittleEndian = 1;
76
77
/**
78
 * xmlEncodingErrMemory:
79
 * @extra:  extra informations
80
 *
81
 * Handle an out of memory condition
82
 */
83
static void
84
xmlEncodingErrMemory(const char *extra)
85
0
{
86
0
    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87
0
}
88
89
/**
90
 * xmlErrEncoding:
91
 * @error:  the error number
92
 * @msg:  the error message
93
 *
94
 * n encoding error
95
 */
96
static void LIBXML_ATTR_FORMAT(2,0)
97
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98
0
{
99
0
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100
0
                    XML_FROM_I18N, error, XML_ERR_FATAL,
101
0
                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102
0
}
103
104
#ifdef LIBXML_ICU_ENABLED
105
static uconv_t*
106
openIcuConverter(const char* name, int toUnicode)
107
{
108
  UErrorCode status = U_ZERO_ERROR;
109
  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110
  if (conv == NULL)
111
    return NULL;
112
113
  conv->uconv = ucnv_open(name, &status);
114
  if (U_FAILURE(status))
115
    goto error;
116
117
  status = U_ZERO_ERROR;
118
  if (toUnicode) {
119
    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120
                        NULL, NULL, NULL, &status);
121
  }
122
  else {
123
    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124
                        NULL, NULL, NULL, &status);
125
  }
126
  if (U_FAILURE(status))
127
    goto error;
128
129
  status = U_ZERO_ERROR;
130
  conv->utf8 = ucnv_open("UTF-8", &status);
131
  if (U_SUCCESS(status))
132
    return conv;
133
134
error:
135
  if (conv->uconv)
136
    ucnv_close(conv->uconv);
137
  xmlFree(conv);
138
  return NULL;
139
}
140
141
static void
142
closeIcuConverter(uconv_t *conv)
143
{
144
  if (conv != NULL) {
145
    ucnv_close(conv->uconv);
146
    ucnv_close(conv->utf8);
147
    xmlFree(conv);
148
  }
149
}
150
#endif /* LIBXML_ICU_ENABLED */
151
152
/************************************************************************
153
 *                  *
154
 *    Conversions To/From UTF8 encoding     *
155
 *                  *
156
 ************************************************************************/
157
158
/**
159
 * asciiToUTF8:
160
 * @out:  a pointer to an array of bytes to store the result
161
 * @outlen:  the length of @out
162
 * @in:  a pointer to an array of ASCII chars
163
 * @inlen:  the length of @in
164
 *
165
 * Take a block of ASCII chars in and try to convert it to an UTF-8
166
 * block of chars out.
167
 * Returns 0 if success, or -1 otherwise
168
 * The value of @inlen after return is the number of octets consumed
169
 *     if the return value is positive, else unpredictable.
170
 * The value of @outlen after return is the number of octets consumed.
171
 */
172
static int
173
asciiToUTF8(unsigned char* out, int *outlen,
174
0
              const unsigned char* in, int *inlen) {
175
0
    unsigned char* outstart = out;
176
0
    const unsigned char* base = in;
177
0
    const unsigned char* processed = in;
178
0
    unsigned char* outend = out + *outlen;
179
0
    const unsigned char* inend;
180
0
    unsigned int c;
181
182
0
    inend = in + (*inlen);
183
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
184
0
  c= *in++;
185
186
0
        if (out >= outend)
187
0
      break;
188
0
        if (c < 0x80) {
189
0
      *out++ = c;
190
0
  } else {
191
0
      *outlen = out - outstart;
192
0
      *inlen = processed - base;
193
0
      return(-1);
194
0
  }
195
196
0
  processed = (const unsigned char*) in;
197
0
    }
198
0
    *outlen = out - outstart;
199
0
    *inlen = processed - base;
200
0
    return(*outlen);
201
0
}
202
203
#ifdef LIBXML_OUTPUT_ENABLED
204
/**
205
 * UTF8Toascii:
206
 * @out:  a pointer to an array of bytes to store the result
207
 * @outlen:  the length of @out
208
 * @in:  a pointer to an array of UTF-8 chars
209
 * @inlen:  the length of @in
210
 *
211
 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212
 * block of chars out.
213
 *
214
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215
 * The value of @inlen after return is the number of octets consumed
216
 *     if the return value is positive, else unpredictable.
217
 * The value of @outlen after return is the number of octets consumed.
218
 */
219
static int
220
UTF8Toascii(unsigned char* out, int *outlen,
221
0
              const unsigned char* in, int *inlen) {
222
0
    const unsigned char* processed = in;
223
0
    const unsigned char* outend;
224
0
    const unsigned char* outstart = out;
225
0
    const unsigned char* instart = in;
226
0
    const unsigned char* inend;
227
0
    unsigned int c, d;
228
0
    int trailing;
229
230
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
231
0
    if (in == NULL) {
232
        /*
233
   * initialization nothing to do
234
   */
235
0
  *outlen = 0;
236
0
  *inlen = 0;
237
0
  return(0);
238
0
    }
239
0
    inend = in + (*inlen);
240
0
    outend = out + (*outlen);
241
0
    while (in < inend) {
242
0
  d = *in++;
243
0
  if      (d < 0x80)  { c= d; trailing= 0; }
244
0
  else if (d < 0xC0) {
245
      /* trailing byte in leading position */
246
0
      *outlen = out - outstart;
247
0
      *inlen = processed - instart;
248
0
      return(-2);
249
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
250
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
251
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
252
0
  else {
253
      /* no chance for this in Ascii */
254
0
      *outlen = out - outstart;
255
0
      *inlen = processed - instart;
256
0
      return(-2);
257
0
  }
258
259
0
  if (inend - in < trailing) {
260
0
      break;
261
0
  }
262
263
0
  for ( ; trailing; trailing--) {
264
0
      if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265
0
    break;
266
0
      c <<= 6;
267
0
      c |= d & 0x3F;
268
0
  }
269
270
  /* assertion: c is a single UTF-4 value */
271
0
  if (c < 0x80) {
272
0
      if (out >= outend)
273
0
    break;
274
0
      *out++ = c;
275
0
  } else {
276
      /* no chance for this in Ascii */
277
0
      *outlen = out - outstart;
278
0
      *inlen = processed - instart;
279
0
      return(-2);
280
0
  }
281
0
  processed = in;
282
0
    }
283
0
    *outlen = out - outstart;
284
0
    *inlen = processed - instart;
285
0
    return(*outlen);
286
0
}
287
#endif /* LIBXML_OUTPUT_ENABLED */
288
289
/**
290
 * isolat1ToUTF8:
291
 * @out:  a pointer to an array of bytes to store the result
292
 * @outlen:  the length of @out
293
 * @in:  a pointer to an array of ISO Latin 1 chars
294
 * @inlen:  the length of @in
295
 *
296
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297
 * block of chars out.
298
 * Returns the number of bytes written if success, or -1 otherwise
299
 * The value of @inlen after return is the number of octets consumed
300
 *     if the return value is positive, else unpredictable.
301
 * The value of @outlen after return is the number of octets consumed.
302
 */
303
int
304
isolat1ToUTF8(unsigned char* out, int *outlen,
305
0
              const unsigned char* in, int *inlen) {
306
0
    unsigned char* outstart = out;
307
0
    const unsigned char* base = in;
308
0
    unsigned char* outend;
309
0
    const unsigned char* inend;
310
0
    const unsigned char* instop;
311
312
0
    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313
0
  return(-1);
314
315
0
    outend = out + *outlen;
316
0
    inend = in + (*inlen);
317
0
    instop = inend;
318
319
0
    while ((in < inend) && (out < outend - 1)) {
320
0
  if (*in >= 0x80) {
321
0
      *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
322
0
            *out++ = ((*in) & 0x3F) | 0x80;
323
0
      ++in;
324
0
  }
325
0
  if ((instop - in) > (outend - out)) instop = in + (outend - out);
326
0
  while ((in < instop) && (*in < 0x80)) {
327
0
      *out++ = *in++;
328
0
  }
329
0
    }
330
0
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
331
0
        *out++ = *in++;
332
0
    }
333
0
    *outlen = out - outstart;
334
0
    *inlen = in - base;
335
0
    return(*outlen);
336
0
}
337
338
/**
339
 * UTF8ToUTF8:
340
 * @out:  a pointer to an array of bytes to store the result
341
 * @outlen:  the length of @out
342
 * @inb:  a pointer to an array of UTF-8 chars
343
 * @inlenb:  the length of @in in UTF-8 chars
344
 *
345
 * No op copy operation for UTF8 handling.
346
 *
347
 * Returns the number of bytes written, or -1 if lack of space.
348
 *     The value of *inlen after return is the number of octets consumed
349
 *     if the return value is positive, else unpredictable.
350
 */
351
static int
352
UTF8ToUTF8(unsigned char* out, int *outlen,
353
           const unsigned char* inb, int *inlenb)
354
0
{
355
0
    int len;
356
357
0
    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
358
0
  return(-1);
359
0
    if (inb == NULL) {
360
        /* inb == NULL means output is initialized. */
361
0
        *outlen = 0;
362
0
        *inlenb = 0;
363
0
        return(0);
364
0
    }
365
0
    if (*outlen > *inlenb) {
366
0
  len = *inlenb;
367
0
    } else {
368
0
  len = *outlen;
369
0
    }
370
0
    if (len < 0)
371
0
  return(-1);
372
373
0
    memcpy(out, inb, len);
374
375
0
    *outlen = len;
376
0
    *inlenb = len;
377
0
    return(*outlen);
378
0
}
379
380
381
#ifdef LIBXML_OUTPUT_ENABLED
382
/**
383
 * UTF8Toisolat1:
384
 * @out:  a pointer to an array of bytes to store the result
385
 * @outlen:  the length of @out
386
 * @in:  a pointer to an array of UTF-8 chars
387
 * @inlen:  the length of @in
388
 *
389
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
390
 * block of chars out.
391
 *
392
 * Returns the number of bytes written if success, -2 if the transcoding fails,
393
           or -1 otherwise
394
 * The value of @inlen after return is the number of octets consumed
395
 *     if the return value is positive, else unpredictable.
396
 * The value of @outlen after return is the number of octets consumed.
397
 */
398
int
399
UTF8Toisolat1(unsigned char* out, int *outlen,
400
0
              const unsigned char* in, int *inlen) {
401
0
    const unsigned char* processed = in;
402
0
    const unsigned char* outend;
403
0
    const unsigned char* outstart = out;
404
0
    const unsigned char* instart = in;
405
0
    const unsigned char* inend;
406
0
    unsigned int c, d;
407
0
    int trailing;
408
409
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
410
0
    if (in == NULL) {
411
        /*
412
   * initialization nothing to do
413
   */
414
0
  *outlen = 0;
415
0
  *inlen = 0;
416
0
  return(0);
417
0
    }
418
0
    inend = in + (*inlen);
419
0
    outend = out + (*outlen);
420
0
    while (in < inend) {
421
0
  d = *in++;
422
0
  if      (d < 0x80)  { c= d; trailing= 0; }
423
0
  else if (d < 0xC0) {
424
      /* trailing byte in leading position */
425
0
      *outlen = out - outstart;
426
0
      *inlen = processed - instart;
427
0
      return(-2);
428
0
        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
429
0
        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
430
0
        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
431
0
  else {
432
      /* no chance for this in IsoLat1 */
433
0
      *outlen = out - outstart;
434
0
      *inlen = processed - instart;
435
0
      return(-2);
436
0
  }
437
438
0
  if (inend - in < trailing) {
439
0
      break;
440
0
  }
441
442
0
  for ( ; trailing; trailing--) {
443
0
      if (in >= inend)
444
0
    break;
445
0
      if (((d= *in++) & 0xC0) != 0x80) {
446
0
    *outlen = out - outstart;
447
0
    *inlen = processed - instart;
448
0
    return(-2);
449
0
      }
450
0
      c <<= 6;
451
0
      c |= d & 0x3F;
452
0
  }
453
454
  /* assertion: c is a single UTF-4 value */
455
0
  if (c <= 0xFF) {
456
0
      if (out >= outend)
457
0
    break;
458
0
      *out++ = c;
459
0
  } else {
460
      /* no chance for this in IsoLat1 */
461
0
      *outlen = out - outstart;
462
0
      *inlen = processed - instart;
463
0
      return(-2);
464
0
  }
465
0
  processed = in;
466
0
    }
467
0
    *outlen = out - outstart;
468
0
    *inlen = processed - instart;
469
0
    return(*outlen);
470
0
}
471
#endif /* LIBXML_OUTPUT_ENABLED */
472
473
/**
474
 * UTF16LEToUTF8:
475
 * @out:  a pointer to an array of bytes to store the result
476
 * @outlen:  the length of @out
477
 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
478
 * @inlenb:  the length of @in in UTF-16LE chars
479
 *
480
 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
481
 * block of chars out. This function assumes the endian property
482
 * is the same between the native type of this machine and the
483
 * inputed one.
484
 *
485
 * Returns the number of bytes written, or -1 if lack of space, or -2
486
 *     if the transcoding fails (if *in is not a valid utf16 string)
487
 *     The value of *inlen after return is the number of octets consumed
488
 *     if the return value is positive, else unpredictable.
489
 */
490
static int
491
UTF16LEToUTF8(unsigned char* out, int *outlen,
492
            const unsigned char* inb, int *inlenb)
493
0
{
494
0
    unsigned char* outstart = out;
495
0
    const unsigned char* processed = inb;
496
0
    unsigned char* outend = out + *outlen;
497
0
    unsigned short* in = (unsigned short*) inb;
498
0
    unsigned short* inend;
499
0
    unsigned int c, d, inlen;
500
0
    unsigned char *tmp;
501
0
    int bits;
502
503
0
    if ((*inlenb % 2) == 1)
504
0
        (*inlenb)--;
505
0
    inlen = *inlenb / 2;
506
0
    inend = in + inlen;
507
0
    while ((in < inend) && (out - outstart + 5 < *outlen)) {
508
0
        if (xmlLittleEndian) {
509
0
      c= *in++;
510
0
  } else {
511
0
      tmp = (unsigned char *) in;
512
0
      c = *tmp++;
513
0
      c = c | (((unsigned int)*tmp) << 8);
514
0
      in++;
515
0
  }
516
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
517
0
      if (in >= inend) {           /* (in > inend) shouldn't happens */
518
0
    break;
519
0
      }
520
0
      if (xmlLittleEndian) {
521
0
    d = *in++;
522
0
      } else {
523
0
    tmp = (unsigned char *) in;
524
0
    d = *tmp++;
525
0
    d = d | (((unsigned int)*tmp) << 8);
526
0
    in++;
527
0
      }
528
0
            if ((d & 0xFC00) == 0xDC00) {
529
0
                c &= 0x03FF;
530
0
                c <<= 10;
531
0
                c |= d & 0x03FF;
532
0
                c += 0x10000;
533
0
            }
534
0
            else {
535
0
    *outlen = out - outstart;
536
0
    *inlenb = processed - inb;
537
0
          return(-2);
538
0
      }
539
0
        }
540
541
  /* assertion: c is a single UTF-4 value */
542
0
        if (out >= outend)
543
0
      break;
544
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
545
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
546
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
547
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
548
549
0
        for ( ; bits >= 0; bits-= 6) {
550
0
            if (out >= outend)
551
0
          break;
552
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
553
0
        }
554
0
  processed = (const unsigned char*) in;
555
0
    }
556
0
    *outlen = out - outstart;
557
0
    *inlenb = processed - inb;
558
0
    return(*outlen);
559
0
}
560
561
#ifdef LIBXML_OUTPUT_ENABLED
562
/**
563
 * UTF8ToUTF16LE:
564
 * @outb:  a pointer to an array of bytes to store the result
565
 * @outlen:  the length of @outb
566
 * @in:  a pointer to an array of UTF-8 chars
567
 * @inlen:  the length of @in
568
 *
569
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
570
 * block of chars out.
571
 *
572
 * Returns the number of bytes written, or -1 if lack of space, or -2
573
 *     if the transcoding failed.
574
 */
575
static int
576
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
577
            const unsigned char* in, int *inlen)
578
0
{
579
0
    unsigned short* out = (unsigned short*) outb;
580
0
    const unsigned char* processed = in;
581
0
    const unsigned char *const instart = in;
582
0
    unsigned short* outstart= out;
583
0
    unsigned short* outend;
584
0
    const unsigned char* inend;
585
0
    unsigned int c, d;
586
0
    int trailing;
587
0
    unsigned char *tmp;
588
0
    unsigned short tmp1, tmp2;
589
590
    /* UTF16LE encoding has no BOM */
591
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
592
0
    if (in == NULL) {
593
0
  *outlen = 0;
594
0
  *inlen = 0;
595
0
  return(0);
596
0
    }
597
0
    inend= in + *inlen;
598
0
    outend = out + (*outlen / 2);
599
0
    while (in < inend) {
600
0
      d= *in++;
601
0
      if      (d < 0x80)  { c= d; trailing= 0; }
602
0
      else if (d < 0xC0) {
603
          /* trailing byte in leading position */
604
0
    *outlen = (out - outstart) * 2;
605
0
    *inlen = processed - instart;
606
0
    return(-2);
607
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
608
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
609
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
610
0
      else {
611
  /* no chance for this in UTF-16 */
612
0
  *outlen = (out - outstart) * 2;
613
0
  *inlen = processed - instart;
614
0
  return(-2);
615
0
      }
616
617
0
      if (inend - in < trailing) {
618
0
          break;
619
0
      }
620
621
0
      for ( ; trailing; trailing--) {
622
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
623
0
        break;
624
0
          c <<= 6;
625
0
          c |= d & 0x3F;
626
0
      }
627
628
      /* assertion: c is a single UTF-4 value */
629
0
        if (c < 0x10000) {
630
0
            if (out >= outend)
631
0
          break;
632
0
      if (xmlLittleEndian) {
633
0
    *out++ = c;
634
0
      } else {
635
0
    tmp = (unsigned char *) out;
636
0
    *tmp = c ;
637
0
    *(tmp + 1) = c >> 8 ;
638
0
    out++;
639
0
      }
640
0
        }
641
0
        else if (c < 0x110000) {
642
0
            if (out+1 >= outend)
643
0
          break;
644
0
            c -= 0x10000;
645
0
      if (xmlLittleEndian) {
646
0
    *out++ = 0xD800 | (c >> 10);
647
0
    *out++ = 0xDC00 | (c & 0x03FF);
648
0
      } else {
649
0
    tmp1 = 0xD800 | (c >> 10);
650
0
    tmp = (unsigned char *) out;
651
0
    *tmp = (unsigned char) tmp1;
652
0
    *(tmp + 1) = tmp1 >> 8;
653
0
    out++;
654
655
0
    tmp2 = 0xDC00 | (c & 0x03FF);
656
0
    tmp = (unsigned char *) out;
657
0
    *tmp  = (unsigned char) tmp2;
658
0
    *(tmp + 1) = tmp2 >> 8;
659
0
    out++;
660
0
      }
661
0
        }
662
0
        else
663
0
      break;
664
0
  processed = in;
665
0
    }
666
0
    *outlen = (out - outstart) * 2;
667
0
    *inlen = processed - instart;
668
0
    return(*outlen);
669
0
}
670
671
/**
672
 * UTF8ToUTF16:
673
 * @outb:  a pointer to an array of bytes to store the result
674
 * @outlen:  the length of @outb
675
 * @in:  a pointer to an array of UTF-8 chars
676
 * @inlen:  the length of @in
677
 *
678
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
679
 * block of chars out.
680
 *
681
 * Returns the number of bytes written, or -1 if lack of space, or -2
682
 *     if the transcoding failed.
683
 */
684
static int
685
UTF8ToUTF16(unsigned char* outb, int *outlen,
686
            const unsigned char* in, int *inlen)
687
0
{
688
0
    if (in == NULL) {
689
  /*
690
   * initialization, add the Byte Order Mark for UTF-16LE
691
   */
692
0
        if (*outlen >= 2) {
693
0
      outb[0] = 0xFF;
694
0
      outb[1] = 0xFE;
695
0
      *outlen = 2;
696
0
      *inlen = 0;
697
#ifdef DEBUG_ENCODING
698
            xmlGenericError(xmlGenericErrorContext,
699
        "Added FFFE Byte Order Mark\n");
700
#endif
701
0
      return(2);
702
0
  }
703
0
  *outlen = 0;
704
0
  *inlen = 0;
705
0
  return(0);
706
0
    }
707
0
    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
708
0
}
709
#endif /* LIBXML_OUTPUT_ENABLED */
710
711
/**
712
 * UTF16BEToUTF8:
713
 * @out:  a pointer to an array of bytes to store the result
714
 * @outlen:  the length of @out
715
 * @inb:  a pointer to an array of UTF-16 passed as a byte array
716
 * @inlenb:  the length of @in in UTF-16 chars
717
 *
718
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
719
 * block of chars out. This function assumes the endian property
720
 * is the same between the native type of this machine and the
721
 * inputed one.
722
 *
723
 * Returns the number of bytes written, or -1 if lack of space, or -2
724
 *     if the transcoding fails (if *in is not a valid utf16 string)
725
 * The value of *inlen after return is the number of octets consumed
726
 *     if the return value is positive, else unpredictable.
727
 */
728
static int
729
UTF16BEToUTF8(unsigned char* out, int *outlen,
730
            const unsigned char* inb, int *inlenb)
731
0
{
732
0
    unsigned char* outstart = out;
733
0
    const unsigned char* processed = inb;
734
0
    unsigned char* outend = out + *outlen;
735
0
    unsigned short* in = (unsigned short*) inb;
736
0
    unsigned short* inend;
737
0
    unsigned int c, d, inlen;
738
0
    unsigned char *tmp;
739
0
    int bits;
740
741
0
    if ((*inlenb % 2) == 1)
742
0
        (*inlenb)--;
743
0
    inlen = *inlenb / 2;
744
0
    inend= in + inlen;
745
0
    while (in < inend) {
746
0
  if (xmlLittleEndian) {
747
0
      tmp = (unsigned char *) in;
748
0
      c = *tmp++;
749
0
      c = c << 8;
750
0
      c = c | (unsigned int) *tmp;
751
0
      in++;
752
0
  } else {
753
0
      c= *in++;
754
0
  }
755
0
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
756
0
      if (in >= inend) {           /* (in > inend) shouldn't happens */
757
0
    *outlen = out - outstart;
758
0
    *inlenb = processed - inb;
759
0
          return(-2);
760
0
      }
761
0
      if (xmlLittleEndian) {
762
0
    tmp = (unsigned char *) in;
763
0
    d = *tmp++;
764
0
    d = d << 8;
765
0
    d = d | (unsigned int) *tmp;
766
0
    in++;
767
0
      } else {
768
0
    d= *in++;
769
0
      }
770
0
            if ((d & 0xFC00) == 0xDC00) {
771
0
                c &= 0x03FF;
772
0
                c <<= 10;
773
0
                c |= d & 0x03FF;
774
0
                c += 0x10000;
775
0
            }
776
0
            else {
777
0
    *outlen = out - outstart;
778
0
    *inlenb = processed - inb;
779
0
          return(-2);
780
0
      }
781
0
        }
782
783
  /* assertion: c is a single UTF-4 value */
784
0
        if (out >= outend)
785
0
      break;
786
0
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
787
0
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
788
0
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
789
0
        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
790
791
0
        for ( ; bits >= 0; bits-= 6) {
792
0
            if (out >= outend)
793
0
          break;
794
0
            *out++= ((c >> bits) & 0x3F) | 0x80;
795
0
        }
796
0
  processed = (const unsigned char*) in;
797
0
    }
798
0
    *outlen = out - outstart;
799
0
    *inlenb = processed - inb;
800
0
    return(*outlen);
801
0
}
802
803
#ifdef LIBXML_OUTPUT_ENABLED
804
/**
805
 * UTF8ToUTF16BE:
806
 * @outb:  a pointer to an array of bytes to store the result
807
 * @outlen:  the length of @outb
808
 * @in:  a pointer to an array of UTF-8 chars
809
 * @inlen:  the length of @in
810
 *
811
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
812
 * block of chars out.
813
 *
814
 * Returns the number of byte written, or -1 by lack of space, or -2
815
 *     if the transcoding failed.
816
 */
817
static int
818
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
819
            const unsigned char* in, int *inlen)
820
0
{
821
0
    unsigned short* out = (unsigned short*) outb;
822
0
    const unsigned char* processed = in;
823
0
    const unsigned char *const instart = in;
824
0
    unsigned short* outstart= out;
825
0
    unsigned short* outend;
826
0
    const unsigned char* inend;
827
0
    unsigned int c, d;
828
0
    int trailing;
829
0
    unsigned char *tmp;
830
0
    unsigned short tmp1, tmp2;
831
832
    /* UTF-16BE has no BOM */
833
0
    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
834
0
    if (in == NULL) {
835
0
  *outlen = 0;
836
0
  *inlen = 0;
837
0
  return(0);
838
0
    }
839
0
    inend= in + *inlen;
840
0
    outend = out + (*outlen / 2);
841
0
    while (in < inend) {
842
0
      d= *in++;
843
0
      if      (d < 0x80)  { c= d; trailing= 0; }
844
0
      else if (d < 0xC0)  {
845
          /* trailing byte in leading position */
846
0
    *outlen = out - outstart;
847
0
    *inlen = processed - instart;
848
0
    return(-2);
849
0
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
850
0
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
851
0
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
852
0
      else {
853
          /* no chance for this in UTF-16 */
854
0
    *outlen = out - outstart;
855
0
    *inlen = processed - instart;
856
0
    return(-2);
857
0
      }
858
859
0
      if (inend - in < trailing) {
860
0
          break;
861
0
      }
862
863
0
      for ( ; trailing; trailing--) {
864
0
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
865
0
          c <<= 6;
866
0
          c |= d & 0x3F;
867
0
      }
868
869
      /* assertion: c is a single UTF-4 value */
870
0
        if (c < 0x10000) {
871
0
            if (out >= outend)  break;
872
0
      if (xmlLittleEndian) {
873
0
    tmp = (unsigned char *) out;
874
0
    *tmp = c >> 8;
875
0
    *(tmp + 1) = c;
876
0
    out++;
877
0
      } else {
878
0
    *out++ = c;
879
0
      }
880
0
        }
881
0
        else if (c < 0x110000) {
882
0
            if (out+1 >= outend)  break;
883
0
            c -= 0x10000;
884
0
      if (xmlLittleEndian) {
885
0
    tmp1 = 0xD800 | (c >> 10);
886
0
    tmp = (unsigned char *) out;
887
0
    *tmp = tmp1 >> 8;
888
0
    *(tmp + 1) = (unsigned char) tmp1;
889
0
    out++;
890
891
0
    tmp2 = 0xDC00 | (c & 0x03FF);
892
0
    tmp = (unsigned char *) out;
893
0
    *tmp = tmp2 >> 8;
894
0
    *(tmp + 1) = (unsigned char) tmp2;
895
0
    out++;
896
0
      } else {
897
0
    *out++ = 0xD800 | (c >> 10);
898
0
    *out++ = 0xDC00 | (c & 0x03FF);
899
0
      }
900
0
        }
901
0
        else
902
0
      break;
903
0
  processed = in;
904
0
    }
905
0
    *outlen = (out - outstart) * 2;
906
0
    *inlen = processed - instart;
907
0
    return(*outlen);
908
0
}
909
#endif /* LIBXML_OUTPUT_ENABLED */
910
911
/************************************************************************
912
 *                  *
913
 *    Generic encoding handling routines      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlDetectCharEncoding:
919
 * @in:  a pointer to the first bytes of the XML entity, must be at least
920
 *       2 bytes long (at least 4 if encoding is UTF4 variant).
921
 * @len:  pointer to the length of the buffer
922
 *
923
 * Guess the encoding of the entity using the first bytes of the entity content
924
 * according to the non-normative appendix F of the XML-1.0 recommendation.
925
 *
926
 * Returns one of the XML_CHAR_ENCODING_... values.
927
 */
928
xmlCharEncoding
929
xmlDetectCharEncoding(const unsigned char* in, int len)
930
0
{
931
0
    if (in == NULL)
932
0
        return(XML_CHAR_ENCODING_NONE);
933
0
    if (len >= 4) {
934
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
935
0
      (in[2] == 0x00) && (in[3] == 0x3C))
936
0
      return(XML_CHAR_ENCODING_UCS4BE);
937
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
938
0
      (in[2] == 0x00) && (in[3] == 0x00))
939
0
      return(XML_CHAR_ENCODING_UCS4LE);
940
0
  if ((in[0] == 0x00) && (in[1] == 0x00) &&
941
0
      (in[2] == 0x3C) && (in[3] == 0x00))
942
0
      return(XML_CHAR_ENCODING_UCS4_2143);
943
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
944
0
      (in[2] == 0x00) && (in[3] == 0x00))
945
0
      return(XML_CHAR_ENCODING_UCS4_3412);
946
0
  if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
947
0
      (in[2] == 0xA7) && (in[3] == 0x94))
948
0
      return(XML_CHAR_ENCODING_EBCDIC);
949
0
  if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
950
0
      (in[2] == 0x78) && (in[3] == 0x6D))
951
0
      return(XML_CHAR_ENCODING_UTF8);
952
  /*
953
   * Although not part of the recommendation, we also
954
   * attempt an "auto-recognition" of UTF-16LE and
955
   * UTF-16BE encodings.
956
   */
957
0
  if ((in[0] == 0x3C) && (in[1] == 0x00) &&
958
0
      (in[2] == 0x3F) && (in[3] == 0x00))
959
0
      return(XML_CHAR_ENCODING_UTF16LE);
960
0
  if ((in[0] == 0x00) && (in[1] == 0x3C) &&
961
0
      (in[2] == 0x00) && (in[3] == 0x3F))
962
0
      return(XML_CHAR_ENCODING_UTF16BE);
963
0
    }
964
0
    if (len >= 3) {
965
  /*
966
   * Errata on XML-1.0 June 20 2001
967
   * We now allow an UTF8 encoded BOM
968
   */
969
0
  if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
970
0
      (in[2] == 0xBF))
971
0
      return(XML_CHAR_ENCODING_UTF8);
972
0
    }
973
    /* For UTF-16 we can recognize by the BOM */
974
0
    if (len >= 2) {
975
0
  if ((in[0] == 0xFE) && (in[1] == 0xFF))
976
0
      return(XML_CHAR_ENCODING_UTF16BE);
977
0
  if ((in[0] == 0xFF) && (in[1] == 0xFE))
978
0
      return(XML_CHAR_ENCODING_UTF16LE);
979
0
    }
980
0
    return(XML_CHAR_ENCODING_NONE);
981
0
}
982
983
/**
984
 * xmlCleanupEncodingAliases:
985
 *
986
 * Unregisters all aliases
987
 */
988
void
989
0
xmlCleanupEncodingAliases(void) {
990
0
    int i;
991
992
0
    if (xmlCharEncodingAliases == NULL)
993
0
  return;
994
995
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
996
0
  if (xmlCharEncodingAliases[i].name != NULL)
997
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
998
0
  if (xmlCharEncodingAliases[i].alias != NULL)
999
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1000
0
    }
1001
0
    xmlCharEncodingAliasesNb = 0;
1002
0
    xmlCharEncodingAliasesMax = 0;
1003
0
    xmlFree(xmlCharEncodingAliases);
1004
0
    xmlCharEncodingAliases = NULL;
1005
0
}
1006
1007
/**
1008
 * xmlGetEncodingAlias:
1009
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1010
 *
1011
 * Lookup an encoding name for the given alias.
1012
 *
1013
 * Returns NULL if not found, otherwise the original name
1014
 */
1015
const char *
1016
112
xmlGetEncodingAlias(const char *alias) {
1017
112
    int i;
1018
112
    char upper[100];
1019
1020
112
    if (alias == NULL)
1021
0
  return(NULL);
1022
1023
112
    if (xmlCharEncodingAliases == NULL)
1024
112
  return(NULL);
1025
1026
0
    for (i = 0;i < 99;i++) {
1027
0
        upper[i] = toupper(alias[i]);
1028
0
  if (upper[i] == 0) break;
1029
0
    }
1030
0
    upper[i] = 0;
1031
1032
    /*
1033
     * Walk down the list looking for a definition of the alias
1034
     */
1035
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1036
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1037
0
      return(xmlCharEncodingAliases[i].name);
1038
0
  }
1039
0
    }
1040
0
    return(NULL);
1041
0
}
1042
1043
/**
1044
 * xmlAddEncodingAlias:
1045
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1046
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1047
 *
1048
 * Registers an alias @alias for an encoding named @name. Existing alias
1049
 * will be overwritten.
1050
 *
1051
 * Returns 0 in case of success, -1 in case of error
1052
 */
1053
int
1054
0
xmlAddEncodingAlias(const char *name, const char *alias) {
1055
0
    int i;
1056
0
    char upper[100];
1057
1058
0
    if ((name == NULL) || (alias == NULL))
1059
0
  return(-1);
1060
1061
0
    for (i = 0;i < 99;i++) {
1062
0
        upper[i] = toupper(alias[i]);
1063
0
  if (upper[i] == 0) break;
1064
0
    }
1065
0
    upper[i] = 0;
1066
1067
0
    if (xmlCharEncodingAliases == NULL) {
1068
0
  xmlCharEncodingAliasesNb = 0;
1069
0
  xmlCharEncodingAliasesMax = 20;
1070
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1071
0
        xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1072
0
  if (xmlCharEncodingAliases == NULL)
1073
0
      return(-1);
1074
0
    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1075
0
  xmlCharEncodingAliasesMax *= 2;
1076
0
  xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1077
0
        xmlRealloc(xmlCharEncodingAliases,
1078
0
             xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1079
0
    }
1080
    /*
1081
     * Walk down the list looking for a definition of the alias
1082
     */
1083
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1084
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1085
      /*
1086
       * Replace the definition.
1087
       */
1088
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1089
0
      xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1090
0
      return(0);
1091
0
  }
1092
0
    }
1093
    /*
1094
     * Add the definition
1095
     */
1096
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1097
0
    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1098
0
    xmlCharEncodingAliasesNb++;
1099
0
    return(0);
1100
0
}
1101
1102
/**
1103
 * xmlDelEncodingAlias:
1104
 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1105
 *
1106
 * Unregisters an encoding alias @alias
1107
 *
1108
 * Returns 0 in case of success, -1 in case of error
1109
 */
1110
int
1111
0
xmlDelEncodingAlias(const char *alias) {
1112
0
    int i;
1113
1114
0
    if (alias == NULL)
1115
0
  return(-1);
1116
1117
0
    if (xmlCharEncodingAliases == NULL)
1118
0
  return(-1);
1119
    /*
1120
     * Walk down the list looking for a definition of the alias
1121
     */
1122
0
    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1123
0
  if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1124
0
      xmlFree((char *) xmlCharEncodingAliases[i].name);
1125
0
      xmlFree((char *) xmlCharEncodingAliases[i].alias);
1126
0
      xmlCharEncodingAliasesNb--;
1127
0
      memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1128
0
        sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1129
0
      return(0);
1130
0
  }
1131
0
    }
1132
0
    return(-1);
1133
0
}
1134
1135
/**
1136
 * xmlParseCharEncoding:
1137
 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1138
 *
1139
 * Compare the string to the encoding schemes already known. Note
1140
 * that the comparison is case insensitive accordingly to the section
1141
 * [XML] 4.3.3 Character Encoding in Entities.
1142
 *
1143
 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1144
 * if not recognized.
1145
 */
1146
xmlCharEncoding
1147
xmlParseCharEncoding(const char* name)
1148
0
{
1149
0
    const char *alias;
1150
0
    char upper[500];
1151
0
    int i;
1152
1153
0
    if (name == NULL)
1154
0
  return(XML_CHAR_ENCODING_NONE);
1155
1156
    /*
1157
     * Do the alias resolution
1158
     */
1159
0
    alias = xmlGetEncodingAlias(name);
1160
0
    if (alias != NULL)
1161
0
  name = alias;
1162
1163
0
    for (i = 0;i < 499;i++) {
1164
0
        upper[i] = toupper(name[i]);
1165
0
  if (upper[i] == 0) break;
1166
0
    }
1167
0
    upper[i] = 0;
1168
1169
0
    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1170
0
    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1171
0
    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1172
1173
    /*
1174
     * NOTE: if we were able to parse this, the endianness of UTF16 is
1175
     *       already found and in use
1176
     */
1177
0
    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1178
0
    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1179
1180
0
    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1181
0
    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182
0
    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1183
1184
    /*
1185
     * NOTE: if we were able to parse this, the endianness of UCS4 is
1186
     *       already found and in use
1187
     */
1188
0
    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1189
0
    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190
0
    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
1192
1193
0
    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1194
0
    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1195
0
    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1196
1197
0
    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1198
0
    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1199
0
    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1200
1201
0
    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1202
0
    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1203
0
    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1204
0
    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1205
0
    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1206
0
    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1207
0
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1208
1209
0
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1210
0
    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1211
0
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1212
1213
#ifdef DEBUG_ENCODING
1214
    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1215
#endif
1216
0
    return(XML_CHAR_ENCODING_ERROR);
1217
0
}
1218
1219
/**
1220
 * xmlGetCharEncodingName:
1221
 * @enc:  the encoding
1222
 *
1223
 * The "canonical" name for XML encoding.
1224
 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1225
 * Section 4.3.3  Character Encoding in Entities
1226
 *
1227
 * Returns the canonical name for the given encoding
1228
 */
1229
1230
const char*
1231
0
xmlGetCharEncodingName(xmlCharEncoding enc) {
1232
0
    switch (enc) {
1233
0
        case XML_CHAR_ENCODING_ERROR:
1234
0
      return(NULL);
1235
0
        case XML_CHAR_ENCODING_NONE:
1236
0
      return(NULL);
1237
0
        case XML_CHAR_ENCODING_UTF8:
1238
0
      return("UTF-8");
1239
0
        case XML_CHAR_ENCODING_UTF16LE:
1240
0
      return("UTF-16");
1241
0
        case XML_CHAR_ENCODING_UTF16BE:
1242
0
      return("UTF-16");
1243
0
        case XML_CHAR_ENCODING_EBCDIC:
1244
0
            return("EBCDIC");
1245
0
        case XML_CHAR_ENCODING_UCS4LE:
1246
0
            return("ISO-10646-UCS-4");
1247
0
        case XML_CHAR_ENCODING_UCS4BE:
1248
0
            return("ISO-10646-UCS-4");
1249
0
        case XML_CHAR_ENCODING_UCS4_2143:
1250
0
            return("ISO-10646-UCS-4");
1251
0
        case XML_CHAR_ENCODING_UCS4_3412:
1252
0
            return("ISO-10646-UCS-4");
1253
0
        case XML_CHAR_ENCODING_UCS2:
1254
0
            return("ISO-10646-UCS-2");
1255
0
        case XML_CHAR_ENCODING_8859_1:
1256
0
      return("ISO-8859-1");
1257
0
        case XML_CHAR_ENCODING_8859_2:
1258
0
      return("ISO-8859-2");
1259
0
        case XML_CHAR_ENCODING_8859_3:
1260
0
      return("ISO-8859-3");
1261
0
        case XML_CHAR_ENCODING_8859_4:
1262
0
      return("ISO-8859-4");
1263
0
        case XML_CHAR_ENCODING_8859_5:
1264
0
      return("ISO-8859-5");
1265
0
        case XML_CHAR_ENCODING_8859_6:
1266
0
      return("ISO-8859-6");
1267
0
        case XML_CHAR_ENCODING_8859_7:
1268
0
      return("ISO-8859-7");
1269
0
        case XML_CHAR_ENCODING_8859_8:
1270
0
      return("ISO-8859-8");
1271
0
        case XML_CHAR_ENCODING_8859_9:
1272
0
      return("ISO-8859-9");
1273
0
        case XML_CHAR_ENCODING_2022_JP:
1274
0
            return("ISO-2022-JP");
1275
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1276
0
            return("Shift-JIS");
1277
0
        case XML_CHAR_ENCODING_EUC_JP:
1278
0
            return("EUC-JP");
1279
0
  case XML_CHAR_ENCODING_ASCII:
1280
0
      return(NULL);
1281
0
    }
1282
0
    return(NULL);
1283
0
}
1284
1285
/************************************************************************
1286
 *                  *
1287
 *      Char encoding handlers        *
1288
 *                  *
1289
 ************************************************************************/
1290
1291
1292
/* the size should be growable, but it's not a big deal ... */
1293
126
#define MAX_ENCODING_HANDLERS 50
1294
static xmlCharEncodingHandlerPtr *handlers = NULL;
1295
static int nbCharEncodingHandler = 0;
1296
1297
/*
1298
 * The default is UTF-8 for XML, that's also the default used for the
1299
 * parser internals, so the default encoding handler is NULL
1300
 */
1301
1302
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1303
1304
/**
1305
 * xmlNewCharEncodingHandler:
1306
 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1307
 * @input:  the xmlCharEncodingInputFunc to read that encoding
1308
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1309
 *
1310
 * Create and registers an xmlCharEncodingHandler.
1311
 *
1312
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1313
 */
1314
xmlCharEncodingHandlerPtr
1315
xmlNewCharEncodingHandler(const char *name,
1316
                          xmlCharEncodingInputFunc input,
1317
112
                          xmlCharEncodingOutputFunc output) {
1318
112
    xmlCharEncodingHandlerPtr handler;
1319
112
    const char *alias;
1320
112
    char upper[500];
1321
112
    int i;
1322
112
    char *up = NULL;
1323
1324
    /*
1325
     * Do the alias resolution
1326
     */
1327
112
    alias = xmlGetEncodingAlias(name);
1328
112
    if (alias != NULL)
1329
0
  name = alias;
1330
1331
    /*
1332
     * Keep only the uppercase version of the encoding.
1333
     */
1334
112
    if (name == NULL) {
1335
0
        xmlEncodingErr(XML_I18N_NO_NAME,
1336
0
           "xmlNewCharEncodingHandler : no name !\n", NULL);
1337
0
  return(NULL);
1338
0
    }
1339
868
    for (i = 0;i < 499;i++) {
1340
868
        upper[i] = toupper(name[i]);
1341
868
  if (upper[i] == 0) break;
1342
868
    }
1343
112
    upper[i] = 0;
1344
112
    up = xmlMemStrdup(upper);
1345
112
    if (up == NULL) {
1346
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1347
0
  return(NULL);
1348
0
    }
1349
1350
    /*
1351
     * allocate and fill-up an handler block.
1352
     */
1353
112
    handler = (xmlCharEncodingHandlerPtr)
1354
112
              xmlMalloc(sizeof(xmlCharEncodingHandler));
1355
112
    if (handler == NULL) {
1356
0
        xmlFree(up);
1357
0
        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1358
0
  return(NULL);
1359
0
    }
1360
112
    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1361
112
    handler->input = input;
1362
112
    handler->output = output;
1363
112
    handler->name = up;
1364
1365
112
#ifdef LIBXML_ICONV_ENABLED
1366
112
    handler->iconv_in = NULL;
1367
112
    handler->iconv_out = NULL;
1368
112
#endif
1369
#ifdef LIBXML_ICU_ENABLED
1370
    handler->uconv_in = NULL;
1371
    handler->uconv_out = NULL;
1372
#endif
1373
1374
    /*
1375
     * registers and returns the handler.
1376
     */
1377
112
    xmlRegisterCharEncodingHandler(handler);
1378
#ifdef DEBUG_ENCODING
1379
    xmlGenericError(xmlGenericErrorContext,
1380
      "Registered encoding handler for %s\n", name);
1381
#endif
1382
112
    return(handler);
1383
112
}
1384
1385
/**
1386
 * xmlInitCharEncodingHandlers:
1387
 *
1388
 * Initialize the char encoding support, it registers the default
1389
 * encoding supported.
1390
 * NOTE: while public, this function usually doesn't need to be called
1391
 *       in normal processing.
1392
 */
1393
void
1394
14
xmlInitCharEncodingHandlers(void) {
1395
14
    unsigned short int tst = 0x1234;
1396
14
    unsigned char *ptr = (unsigned char *) &tst;
1397
1398
14
    if (handlers != NULL) return;
1399
1400
14
    handlers = (xmlCharEncodingHandlerPtr *)
1401
14
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1402
1403
14
    if (*ptr == 0x12) xmlLittleEndian = 0;
1404
14
    else if (*ptr == 0x34) xmlLittleEndian = 1;
1405
0
    else {
1406
0
        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1407
0
                 "Odd problem at endianness detection\n", NULL);
1408
0
    }
1409
1410
14
    if (handlers == NULL) {
1411
0
        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1412
0
  return;
1413
0
    }
1414
14
    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1415
14
#ifdef LIBXML_OUTPUT_ENABLED
1416
14
    xmlUTF16LEHandler =
1417
14
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1418
14
    xmlUTF16BEHandler =
1419
14
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1420
14
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1421
14
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1422
14
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1423
14
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1424
14
#ifdef LIBXML_HTML_ENABLED
1425
14
    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1426
14
#endif
1427
#else
1428
    xmlUTF16LEHandler =
1429
          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1430
    xmlUTF16BEHandler =
1431
          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1432
    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1433
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1434
    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1435
    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1436
#endif /* LIBXML_OUTPUT_ENABLED */
1437
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1438
#ifdef LIBXML_ISO8859X_ENABLED
1439
    xmlRegisterCharEncodingHandlersISO8859x ();
1440
#endif
1441
#endif
1442
1443
14
}
1444
1445
/**
1446
 * xmlCleanupCharEncodingHandlers:
1447
 *
1448
 * Cleanup the memory allocated for the char encoding support, it
1449
 * unregisters all the encoding handlers and the aliases.
1450
 */
1451
void
1452
0
xmlCleanupCharEncodingHandlers(void) {
1453
0
    xmlCleanupEncodingAliases();
1454
1455
0
    if (handlers == NULL) return;
1456
1457
0
    for (;nbCharEncodingHandler > 0;) {
1458
0
        nbCharEncodingHandler--;
1459
0
  if (handlers[nbCharEncodingHandler] != NULL) {
1460
0
      if (handlers[nbCharEncodingHandler]->name != NULL)
1461
0
    xmlFree(handlers[nbCharEncodingHandler]->name);
1462
0
      xmlFree(handlers[nbCharEncodingHandler]);
1463
0
  }
1464
0
    }
1465
0
    xmlFree(handlers);
1466
0
    handlers = NULL;
1467
0
    nbCharEncodingHandler = 0;
1468
0
    xmlDefaultCharEncodingHandler = NULL;
1469
0
}
1470
1471
/**
1472
 * xmlRegisterCharEncodingHandler:
1473
 * @handler:  the xmlCharEncodingHandlerPtr handler block
1474
 *
1475
 * Register the char encoding handler, surprising, isn't it ?
1476
 */
1477
void
1478
112
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1479
112
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1480
112
    if ((handler == NULL) || (handlers == NULL)) {
1481
0
        xmlEncodingErr(XML_I18N_NO_HANDLER,
1482
0
    "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1483
0
  return;
1484
0
    }
1485
1486
112
    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1487
0
        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1488
0
  "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1489
0
                 "MAX_ENCODING_HANDLERS");
1490
0
  return;
1491
0
    }
1492
112
    handlers[nbCharEncodingHandler++] = handler;
1493
112
}
1494
1495
/**
1496
 * xmlGetCharEncodingHandler:
1497
 * @enc:  an xmlCharEncoding value.
1498
 *
1499
 * Search in the registered set the handler able to read/write that encoding.
1500
 *
1501
 * Returns the handler or NULL if not found
1502
 */
1503
xmlCharEncodingHandlerPtr
1504
0
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1505
0
    xmlCharEncodingHandlerPtr handler;
1506
1507
0
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1508
0
    switch (enc) {
1509
0
        case XML_CHAR_ENCODING_ERROR:
1510
0
      return(NULL);
1511
0
        case XML_CHAR_ENCODING_NONE:
1512
0
      return(NULL);
1513
0
        case XML_CHAR_ENCODING_UTF8:
1514
0
      return(NULL);
1515
0
        case XML_CHAR_ENCODING_UTF16LE:
1516
0
      return(xmlUTF16LEHandler);
1517
0
        case XML_CHAR_ENCODING_UTF16BE:
1518
0
      return(xmlUTF16BEHandler);
1519
0
        case XML_CHAR_ENCODING_EBCDIC:
1520
0
            handler = xmlFindCharEncodingHandler("EBCDIC");
1521
0
            if (handler != NULL) return(handler);
1522
0
            handler = xmlFindCharEncodingHandler("ebcdic");
1523
0
            if (handler != NULL) return(handler);
1524
0
            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1525
0
            if (handler != NULL) return(handler);
1526
0
            handler = xmlFindCharEncodingHandler("IBM-037");
1527
0
            if (handler != NULL) return(handler);
1528
0
      break;
1529
0
        case XML_CHAR_ENCODING_UCS4BE:
1530
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531
0
            if (handler != NULL) return(handler);
1532
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1533
0
            if (handler != NULL) return(handler);
1534
0
            handler = xmlFindCharEncodingHandler("UCS4");
1535
0
            if (handler != NULL) return(handler);
1536
0
      break;
1537
0
        case XML_CHAR_ENCODING_UCS4LE:
1538
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1539
0
            if (handler != NULL) return(handler);
1540
0
            handler = xmlFindCharEncodingHandler("UCS-4");
1541
0
            if (handler != NULL) return(handler);
1542
0
            handler = xmlFindCharEncodingHandler("UCS4");
1543
0
            if (handler != NULL) return(handler);
1544
0
      break;
1545
0
        case XML_CHAR_ENCODING_UCS4_2143:
1546
0
      break;
1547
0
        case XML_CHAR_ENCODING_UCS4_3412:
1548
0
      break;
1549
0
        case XML_CHAR_ENCODING_UCS2:
1550
0
            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1551
0
            if (handler != NULL) return(handler);
1552
0
            handler = xmlFindCharEncodingHandler("UCS-2");
1553
0
            if (handler != NULL) return(handler);
1554
0
            handler = xmlFindCharEncodingHandler("UCS2");
1555
0
            if (handler != NULL) return(handler);
1556
0
      break;
1557
1558
      /*
1559
       * We used to keep ISO Latin encodings native in the
1560
       * generated data. This led to so many problems that
1561
       * this has been removed. One can still change this
1562
       * back by registering no-ops encoders for those
1563
       */
1564
0
        case XML_CHAR_ENCODING_8859_1:
1565
0
      handler = xmlFindCharEncodingHandler("ISO-8859-1");
1566
0
      if (handler != NULL) return(handler);
1567
0
      break;
1568
0
        case XML_CHAR_ENCODING_8859_2:
1569
0
      handler = xmlFindCharEncodingHandler("ISO-8859-2");
1570
0
      if (handler != NULL) return(handler);
1571
0
      break;
1572
0
        case XML_CHAR_ENCODING_8859_3:
1573
0
      handler = xmlFindCharEncodingHandler("ISO-8859-3");
1574
0
      if (handler != NULL) return(handler);
1575
0
      break;
1576
0
        case XML_CHAR_ENCODING_8859_4:
1577
0
      handler = xmlFindCharEncodingHandler("ISO-8859-4");
1578
0
      if (handler != NULL) return(handler);
1579
0
      break;
1580
0
        case XML_CHAR_ENCODING_8859_5:
1581
0
      handler = xmlFindCharEncodingHandler("ISO-8859-5");
1582
0
      if (handler != NULL) return(handler);
1583
0
      break;
1584
0
        case XML_CHAR_ENCODING_8859_6:
1585
0
      handler = xmlFindCharEncodingHandler("ISO-8859-6");
1586
0
      if (handler != NULL) return(handler);
1587
0
      break;
1588
0
        case XML_CHAR_ENCODING_8859_7:
1589
0
      handler = xmlFindCharEncodingHandler("ISO-8859-7");
1590
0
      if (handler != NULL) return(handler);
1591
0
      break;
1592
0
        case XML_CHAR_ENCODING_8859_8:
1593
0
      handler = xmlFindCharEncodingHandler("ISO-8859-8");
1594
0
      if (handler != NULL) return(handler);
1595
0
      break;
1596
0
        case XML_CHAR_ENCODING_8859_9:
1597
0
      handler = xmlFindCharEncodingHandler("ISO-8859-9");
1598
0
      if (handler != NULL) return(handler);
1599
0
      break;
1600
1601
1602
0
        case XML_CHAR_ENCODING_2022_JP:
1603
0
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1604
0
            if (handler != NULL) return(handler);
1605
0
      break;
1606
0
        case XML_CHAR_ENCODING_SHIFT_JIS:
1607
0
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1608
0
            if (handler != NULL) return(handler);
1609
0
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1610
0
            if (handler != NULL) return(handler);
1611
0
            handler = xmlFindCharEncodingHandler("Shift_JIS");
1612
0
            if (handler != NULL) return(handler);
1613
0
      break;
1614
0
        case XML_CHAR_ENCODING_EUC_JP:
1615
0
            handler = xmlFindCharEncodingHandler("EUC-JP");
1616
0
            if (handler != NULL) return(handler);
1617
0
      break;
1618
0
  default:
1619
0
      break;
1620
0
    }
1621
1622
#ifdef DEBUG_ENCODING
1623
    xmlGenericError(xmlGenericErrorContext,
1624
      "No handler found for encoding %d\n", enc);
1625
#endif
1626
0
    return(NULL);
1627
0
}
1628
1629
/**
1630
 * xmlFindCharEncodingHandler:
1631
 * @name:  a string describing the char encoding.
1632
 *
1633
 * Search in the registered set the handler able to read/write that encoding.
1634
 *
1635
 * Returns the handler or NULL if not found
1636
 */
1637
xmlCharEncodingHandlerPtr
1638
0
xmlFindCharEncodingHandler(const char *name) {
1639
0
    const char *nalias;
1640
0
    const char *norig;
1641
0
    xmlCharEncoding alias;
1642
0
#ifdef LIBXML_ICONV_ENABLED
1643
0
    xmlCharEncodingHandlerPtr enc;
1644
0
    iconv_t icv_in, icv_out;
1645
0
#endif /* LIBXML_ICONV_ENABLED */
1646
#ifdef LIBXML_ICU_ENABLED
1647
    xmlCharEncodingHandlerPtr encu;
1648
    uconv_t *ucv_in, *ucv_out;
1649
#endif /* LIBXML_ICU_ENABLED */
1650
0
    char upper[100];
1651
0
    int i;
1652
1653
0
    if (handlers == NULL) xmlInitCharEncodingHandlers();
1654
0
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1655
0
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1656
1657
    /*
1658
     * Do the alias resolution
1659
     */
1660
0
    norig = name;
1661
0
    nalias = xmlGetEncodingAlias(name);
1662
0
    if (nalias != NULL)
1663
0
  name = nalias;
1664
1665
    /*
1666
     * Check first for directly registered encoding names
1667
     */
1668
0
    for (i = 0;i < 99;i++) {
1669
0
        upper[i] = toupper(name[i]);
1670
0
  if (upper[i] == 0) break;
1671
0
    }
1672
0
    upper[i] = 0;
1673
1674
0
    if (handlers != NULL) {
1675
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
1676
0
            if (!strcmp(upper, handlers[i]->name)) {
1677
#ifdef DEBUG_ENCODING
1678
                xmlGenericError(xmlGenericErrorContext,
1679
                        "Found registered handler for encoding %s\n", name);
1680
#endif
1681
0
                return(handlers[i]);
1682
0
            }
1683
0
        }
1684
0
    }
1685
1686
0
#ifdef LIBXML_ICONV_ENABLED
1687
    /* check whether iconv can handle this */
1688
0
    icv_in = iconv_open("UTF-8", name);
1689
0
    icv_out = iconv_open(name, "UTF-8");
1690
0
    if (icv_in == (iconv_t) -1) {
1691
0
        icv_in = iconv_open("UTF-8", upper);
1692
0
    }
1693
0
    if (icv_out == (iconv_t) -1) {
1694
0
  icv_out = iconv_open(upper, "UTF-8");
1695
0
    }
1696
0
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1697
0
      enc = (xmlCharEncodingHandlerPtr)
1698
0
            xmlMalloc(sizeof(xmlCharEncodingHandler));
1699
0
      if (enc == NULL) {
1700
0
          iconv_close(icv_in);
1701
0
          iconv_close(icv_out);
1702
0
    return(NULL);
1703
0
      }
1704
0
            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1705
0
      enc->name = xmlMemStrdup(name);
1706
0
      enc->input = NULL;
1707
0
      enc->output = NULL;
1708
0
      enc->iconv_in = icv_in;
1709
0
      enc->iconv_out = icv_out;
1710
#ifdef DEBUG_ENCODING
1711
            xmlGenericError(xmlGenericErrorContext,
1712
        "Found iconv handler for encoding %s\n", name);
1713
#endif
1714
0
      return enc;
1715
0
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1716
0
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1717
0
        "iconv : problems with filters for '%s'\n", name);
1718
0
    }
1719
0
#endif /* LIBXML_ICONV_ENABLED */
1720
#ifdef LIBXML_ICU_ENABLED
1721
    /* check whether icu can handle this */
1722
    ucv_in = openIcuConverter(name, 1);
1723
    ucv_out = openIcuConverter(name, 0);
1724
    if (ucv_in != NULL && ucv_out != NULL) {
1725
      encu = (xmlCharEncodingHandlerPtr)
1726
             xmlMalloc(sizeof(xmlCharEncodingHandler));
1727
      if (encu == NULL) {
1728
                closeIcuConverter(ucv_in);
1729
                closeIcuConverter(ucv_out);
1730
    return(NULL);
1731
      }
1732
            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1733
      encu->name = xmlMemStrdup(name);
1734
      encu->input = NULL;
1735
      encu->output = NULL;
1736
      encu->uconv_in = ucv_in;
1737
      encu->uconv_out = ucv_out;
1738
#ifdef DEBUG_ENCODING
1739
            xmlGenericError(xmlGenericErrorContext,
1740
        "Found ICU converter handler for encoding %s\n", name);
1741
#endif
1742
      return encu;
1743
    } else if (ucv_in != NULL || ucv_out != NULL) {
1744
            closeIcuConverter(ucv_in);
1745
            closeIcuConverter(ucv_out);
1746
      xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1747
        "ICU converter : problems with filters for '%s'\n", name);
1748
    }
1749
#endif /* LIBXML_ICU_ENABLED */
1750
1751
#ifdef DEBUG_ENCODING
1752
    xmlGenericError(xmlGenericErrorContext,
1753
      "No handler found for encoding %s\n", name);
1754
#endif
1755
1756
    /*
1757
     * Fallback using the canonical names
1758
     */
1759
0
    alias = xmlParseCharEncoding(norig);
1760
0
    if (alias != XML_CHAR_ENCODING_ERROR) {
1761
0
        const char* canon;
1762
0
        canon = xmlGetCharEncodingName(alias);
1763
0
        if ((canon != NULL) && (strcmp(name, canon))) {
1764
0
      return(xmlFindCharEncodingHandler(canon));
1765
0
        }
1766
0
    }
1767
1768
    /* If "none of the above", give up */
1769
0
    return(NULL);
1770
0
}
1771
1772
/************************************************************************
1773
 *                  *
1774
 *    ICONV based generic conversion functions    *
1775
 *                  *
1776
 ************************************************************************/
1777
1778
#ifdef LIBXML_ICONV_ENABLED
1779
/**
1780
 * xmlIconvWrapper:
1781
 * @cd:   iconv converter data structure
1782
 * @out:  a pointer to an array of bytes to store the result
1783
 * @outlen:  the length of @out
1784
 * @in:  a pointer to an array of ISO Latin 1 chars
1785
 * @inlen:  the length of @in
1786
 *
1787
 * Returns 0 if success, or
1788
 *     -1 by lack of space, or
1789
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1790
 *        the result of transformation can't fit into the encoding we want), or
1791
 *     -3 if there the last byte can't form a single output char.
1792
 *
1793
 * The value of @inlen after return is the number of octets consumed
1794
 *     as the return value is positive, else unpredictable.
1795
 * The value of @outlen after return is the number of ocetes consumed.
1796
 */
1797
static int
1798
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1799
0
                const unsigned char *in, int *inlen) {
1800
0
    size_t icv_inlen, icv_outlen;
1801
0
    const char *icv_in = (const char *) in;
1802
0
    char *icv_out = (char *) out;
1803
0
    int ret;
1804
1805
0
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1806
0
        if (outlen != NULL) *outlen = 0;
1807
0
        return(-1);
1808
0
    }
1809
0
    icv_inlen = *inlen;
1810
0
    icv_outlen = *outlen;
1811
0
    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1812
0
    *inlen -= icv_inlen;
1813
0
    *outlen -= icv_outlen;
1814
0
    if ((icv_inlen != 0) || (ret == -1)) {
1815
0
#ifdef EILSEQ
1816
0
        if (errno == EILSEQ) {
1817
0
            return -2;
1818
0
        } else
1819
0
#endif
1820
0
#ifdef E2BIG
1821
0
        if (errno == E2BIG) {
1822
0
            return -1;
1823
0
        } else
1824
0
#endif
1825
0
#ifdef EINVAL
1826
0
        if (errno == EINVAL) {
1827
0
            return -3;
1828
0
        } else
1829
0
#endif
1830
0
        {
1831
0
            return -3;
1832
0
        }
1833
0
    }
1834
0
    return 0;
1835
0
}
1836
#endif /* LIBXML_ICONV_ENABLED */
1837
1838
/************************************************************************
1839
 *                  *
1840
 *    ICU based generic conversion functions    *
1841
 *                  *
1842
 ************************************************************************/
1843
1844
#ifdef LIBXML_ICU_ENABLED
1845
/**
1846
 * xmlUconvWrapper:
1847
 * @cd: ICU uconverter data structure
1848
 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1849
 * @out:  a pointer to an array of bytes to store the result
1850
 * @outlen:  the length of @out
1851
 * @in:  a pointer to an array of ISO Latin 1 chars
1852
 * @inlen:  the length of @in
1853
 *
1854
 * Returns 0 if success, or
1855
 *     -1 by lack of space, or
1856
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1857
 *        the result of transformation can't fit into the encoding we want), or
1858
 *     -3 if there the last byte can't form a single output char.
1859
 *
1860
 * The value of @inlen after return is the number of octets consumed
1861
 *     as the return value is positive, else unpredictable.
1862
 * The value of @outlen after return is the number of ocetes consumed.
1863
 */
1864
static int
1865
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1866
                const unsigned char *in, int *inlen) {
1867
    const char *ucv_in = (const char *) in;
1868
    char *ucv_out = (char *) out;
1869
    UErrorCode err = U_ZERO_ERROR;
1870
1871
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1872
        if (outlen != NULL) *outlen = 0;
1873
        return(-1);
1874
    }
1875
1876
    /*
1877
     * TODO(jungshik)
1878
     * 1. is ucnv_convert(To|From)Algorithmic better?
1879
     * 2. had we better use an explicit pivot buffer?
1880
     * 3. error returned comes from 'fromUnicode' only even
1881
     *    when toUnicode is true !
1882
     */
1883
    if (toUnicode) {
1884
        /* encoding => UTF-16 => UTF-8 */
1885
        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1886
                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1887
                       0, TRUE, &err);
1888
    } else {
1889
        /* UTF-8 => UTF-16 => encoding */
1890
        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1891
                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1892
                       0, TRUE, &err);
1893
    }
1894
    *inlen = ucv_in - (const char*) in;
1895
    *outlen = ucv_out - (char *) out;
1896
    if (U_SUCCESS(err))
1897
        return 0;
1898
    if (err == U_BUFFER_OVERFLOW_ERROR)
1899
        return -1;
1900
    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1901
        return -2;
1902
    /* if (err == U_TRUNCATED_CHAR_FOUND) */
1903
    return -3;
1904
}
1905
#endif /* LIBXML_ICU_ENABLED */
1906
1907
/************************************************************************
1908
 *                  *
1909
 *    The real API used by libxml for on-the-fly conversion *
1910
 *                  *
1911
 ************************************************************************/
1912
1913
static int
1914
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1915
0
                 int *outlen, const unsigned char *in, int *inlen) {
1916
0
    int ret;
1917
1918
0
    if (handler->input != NULL) {
1919
0
        ret = handler->input(out, outlen, in, inlen);
1920
0
    }
1921
0
#ifdef LIBXML_ICONV_ENABLED
1922
0
    else if (handler->iconv_in != NULL) {
1923
0
        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1924
0
    }
1925
0
#endif /* LIBXML_ICONV_ENABLED */
1926
#ifdef LIBXML_ICU_ENABLED
1927
    else if (handler->uconv_in != NULL) {
1928
        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
1929
    }
1930
#endif /* LIBXML_ICU_ENABLED */
1931
0
    else {
1932
0
        *outlen = 0;
1933
0
        *inlen = 0;
1934
0
        ret = -2;
1935
0
    }
1936
1937
0
    return(ret);
1938
0
}
1939
1940
/* Returns -4 if no output function was found. */
1941
static int
1942
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1943
0
                  int *outlen, const unsigned char *in, int *inlen) {
1944
0
    int ret;
1945
1946
0
    if (handler->output != NULL) {
1947
0
        ret = handler->output(out, outlen, in, inlen);
1948
0
    }
1949
0
#ifdef LIBXML_ICONV_ENABLED
1950
0
    else if (handler->iconv_out != NULL) {
1951
0
        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1952
0
    }
1953
0
#endif /* LIBXML_ICONV_ENABLED */
1954
#ifdef LIBXML_ICU_ENABLED
1955
    else if (handler->uconv_out != NULL) {
1956
        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
1957
    }
1958
#endif /* LIBXML_ICU_ENABLED */
1959
0
    else {
1960
0
        *outlen = 0;
1961
0
        *inlen = 0;
1962
0
        ret = -4;
1963
0
    }
1964
1965
0
    return(ret);
1966
0
}
1967
1968
/**
1969
 * xmlCharEncFirstLineInt:
1970
 * @handler:  char enconding transformation data structure
1971
 * @out:  an xmlBuffer for the output.
1972
 * @in:  an xmlBuffer for the input
1973
 * @len:  number of bytes to convert for the first line, or -1
1974
 *
1975
 * Front-end for the encoding handler input function, but handle only
1976
 * the very first line, i.e. limit itself to 45 chars.
1977
 *
1978
 * Returns the number of byte written if success, or
1979
 *     -1 general error
1980
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1981
 *        the result of transformation can't fit into the encoding we want), or
1982
 */
1983
int
1984
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1985
0
                       xmlBufferPtr in, int len) {
1986
0
    int ret;
1987
0
    int written;
1988
0
    int toconv;
1989
1990
0
    if (handler == NULL) return(-1);
1991
0
    if (out == NULL) return(-1);
1992
0
    if (in == NULL) return(-1);
1993
1994
    /* calculate space available */
1995
0
    written = out->size - out->use - 1; /* count '\0' */
1996
0
    toconv = in->use;
1997
    /*
1998
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1999
     * 45 chars should be sufficient to reach the end of the encoding
2000
     * declaration without going too far inside the document content.
2001
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2002
     * The actual value depending on guessed encoding is passed as @len
2003
     * if provided
2004
     */
2005
0
    if (len >= 0) {
2006
0
        if (toconv > len)
2007
0
            toconv = len;
2008
0
    } else {
2009
0
        if (toconv > 180)
2010
0
            toconv = 180;
2011
0
    }
2012
0
    if (toconv * 2 >= written) {
2013
0
        xmlBufferGrow(out, toconv * 2);
2014
0
  written = out->size - out->use - 1;
2015
0
    }
2016
2017
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2018
0
                           in->content, &toconv);
2019
0
    xmlBufferShrink(in, toconv);
2020
0
    out->use += written;
2021
0
    out->content[out->use] = 0;
2022
0
    if (ret == -1) ret = -3;
2023
2024
#ifdef DEBUG_ENCODING
2025
    switch (ret) {
2026
        case 0:
2027
      xmlGenericError(xmlGenericErrorContext,
2028
        "converted %d bytes to %d bytes of input\n",
2029
              toconv, written);
2030
      break;
2031
        case -1:
2032
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2033
              toconv, written, in->use);
2034
      break;
2035
        case -2:
2036
      xmlGenericError(xmlGenericErrorContext,
2037
        "input conversion failed due to input error\n");
2038
      break;
2039
        case -3:
2040
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2041
              toconv, written, in->use);
2042
      break;
2043
  default:
2044
      xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2045
    }
2046
#endif /* DEBUG_ENCODING */
2047
    /*
2048
     * Ignore when input buffer is not on a boundary
2049
     */
2050
0
    if (ret == -3) ret = 0;
2051
0
    if (ret == -1) ret = 0;
2052
0
    return(ret);
2053
0
}
2054
2055
/**
2056
 * xmlCharEncFirstLine:
2057
 * @handler:  char enconding transformation data structure
2058
 * @out:  an xmlBuffer for the output.
2059
 * @in:  an xmlBuffer for the input
2060
 *
2061
 * Front-end for the encoding handler input function, but handle only
2062
 * the very first line, i.e. limit itself to 45 chars.
2063
 *
2064
 * Returns the number of byte written if success, or
2065
 *     -1 general error
2066
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2067
 *        the result of transformation can't fit into the encoding we want), or
2068
 */
2069
int
2070
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2071
0
                 xmlBufferPtr in) {
2072
0
    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2073
0
}
2074
2075
/**
2076
 * xmlCharEncFirstLineInput:
2077
 * @input: a parser input buffer
2078
 * @len:  number of bytes to convert for the first line, or -1
2079
 *
2080
 * Front-end for the encoding handler input function, but handle only
2081
 * the very first line. Point is that this is based on autodetection
2082
 * of the encoding and once that first line is converted we may find
2083
 * out that a different decoder is needed to process the input.
2084
 *
2085
 * Returns the number of byte written if success, or
2086
 *     -1 general error
2087
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2088
 *        the result of transformation can't fit into the encoding we want), or
2089
 */
2090
int
2091
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2092
0
{
2093
0
    int ret;
2094
0
    size_t written;
2095
0
    size_t toconv;
2096
0
    int c_in;
2097
0
    int c_out;
2098
0
    xmlBufPtr in;
2099
0
    xmlBufPtr out;
2100
2101
0
    if ((input == NULL) || (input->encoder == NULL) ||
2102
0
        (input->buffer == NULL) || (input->raw == NULL))
2103
0
        return (-1);
2104
0
    out = input->buffer;
2105
0
    in = input->raw;
2106
2107
0
    toconv = xmlBufUse(in);
2108
0
    if (toconv == 0)
2109
0
        return (0);
2110
0
    written = xmlBufAvail(out) - 1; /* count '\0' */
2111
    /*
2112
     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2113
     * 45 chars should be sufficient to reach the end of the encoding
2114
     * declaration without going too far inside the document content.
2115
     * on UTF-16 this means 90bytes, on UCS4 this means 180
2116
     * The actual value depending on guessed encoding is passed as @len
2117
     * if provided
2118
     */
2119
0
    if (len >= 0) {
2120
0
        if (toconv > (unsigned int) len)
2121
0
            toconv = len;
2122
0
    } else {
2123
0
        if (toconv > 180)
2124
0
            toconv = 180;
2125
0
    }
2126
0
    if (toconv * 2 >= written) {
2127
0
        xmlBufGrow(out, toconv * 2);
2128
0
        written = xmlBufAvail(out) - 1;
2129
0
    }
2130
0
    if (written > 360)
2131
0
        written = 360;
2132
2133
0
    c_in = toconv;
2134
0
    c_out = written;
2135
0
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2136
0
                           xmlBufContent(in), &c_in);
2137
0
    xmlBufShrink(in, c_in);
2138
0
    xmlBufAddLen(out, c_out);
2139
0
    if (ret == -1)
2140
0
        ret = -3;
2141
2142
0
    switch (ret) {
2143
0
        case 0:
2144
#ifdef DEBUG_ENCODING
2145
            xmlGenericError(xmlGenericErrorContext,
2146
                            "converted %d bytes to %d bytes of input\n",
2147
                            c_in, c_out);
2148
#endif
2149
0
            break;
2150
0
        case -1:
2151
#ifdef DEBUG_ENCODING
2152
            xmlGenericError(xmlGenericErrorContext,
2153
                         "converted %d bytes to %d bytes of input, %d left\n",
2154
                            c_in, c_out, (int)xmlBufUse(in));
2155
#endif
2156
0
            break;
2157
0
        case -3:
2158
#ifdef DEBUG_ENCODING
2159
            xmlGenericError(xmlGenericErrorContext,
2160
                        "converted %d bytes to %d bytes of input, %d left\n",
2161
                            c_in, c_out, (int)xmlBufUse(in));
2162
#endif
2163
0
            break;
2164
0
        case -2: {
2165
0
            char buf[50];
2166
0
            const xmlChar *content = xmlBufContent(in);
2167
2168
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2169
0
         content[0], content[1],
2170
0
         content[2], content[3]);
2171
0
      buf[49] = 0;
2172
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2173
0
        "input conversion failed due to input error, bytes %s\n",
2174
0
               buf);
2175
0
        }
2176
0
    }
2177
    /*
2178
     * Ignore when input buffer is not on a boundary
2179
     */
2180
0
    if (ret == -3) ret = 0;
2181
0
    if (ret == -1) ret = 0;
2182
0
    return(ret);
2183
0
}
2184
2185
/**
2186
 * xmlCharEncInput:
2187
 * @input: a parser input buffer
2188
 * @flush: try to flush all the raw buffer
2189
 *
2190
 * Generic front-end for the encoding handler on parser input
2191
 *
2192
 * Returns the number of byte written if success, or
2193
 *     -1 general error
2194
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2195
 *        the result of transformation can't fit into the encoding we want), or
2196
 */
2197
int
2198
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2199
0
{
2200
0
    int ret;
2201
0
    size_t written;
2202
0
    size_t toconv;
2203
0
    int c_in;
2204
0
    int c_out;
2205
0
    xmlBufPtr in;
2206
0
    xmlBufPtr out;
2207
2208
0
    if ((input == NULL) || (input->encoder == NULL) ||
2209
0
        (input->buffer == NULL) || (input->raw == NULL))
2210
0
        return (-1);
2211
0
    out = input->buffer;
2212
0
    in = input->raw;
2213
2214
0
    toconv = xmlBufUse(in);
2215
0
    if (toconv == 0)
2216
0
        return (0);
2217
0
    if ((toconv > 64 * 1024) && (flush == 0))
2218
0
        toconv = 64 * 1024;
2219
0
    written = xmlBufAvail(out);
2220
0
    if (written > 0)
2221
0
        written--; /* count '\0' */
2222
0
    if (toconv * 2 >= written) {
2223
0
        xmlBufGrow(out, toconv * 2);
2224
0
        written = xmlBufAvail(out);
2225
0
        if (written > 0)
2226
0
            written--; /* count '\0' */
2227
0
    }
2228
0
    if ((written > 128 * 1024) && (flush == 0))
2229
0
        written = 128 * 1024;
2230
2231
0
    c_in = toconv;
2232
0
    c_out = written;
2233
0
    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2234
0
                           xmlBufContent(in), &c_in);
2235
0
    xmlBufShrink(in, c_in);
2236
0
    xmlBufAddLen(out, c_out);
2237
0
    if (ret == -1)
2238
0
        ret = -3;
2239
2240
0
    switch (ret) {
2241
0
        case 0:
2242
#ifdef DEBUG_ENCODING
2243
            xmlGenericError(xmlGenericErrorContext,
2244
                            "converted %d bytes to %d bytes of input\n",
2245
                            c_in, c_out);
2246
#endif
2247
0
            break;
2248
0
        case -1:
2249
#ifdef DEBUG_ENCODING
2250
            xmlGenericError(xmlGenericErrorContext,
2251
                         "converted %d bytes to %d bytes of input, %d left\n",
2252
                            c_in, c_out, (int)xmlBufUse(in));
2253
#endif
2254
0
            break;
2255
0
        case -3:
2256
#ifdef DEBUG_ENCODING
2257
            xmlGenericError(xmlGenericErrorContext,
2258
                        "converted %d bytes to %d bytes of input, %d left\n",
2259
                            c_in, c_out, (int)xmlBufUse(in));
2260
#endif
2261
0
            break;
2262
0
        case -2: {
2263
0
            char buf[50];
2264
0
            const xmlChar *content = xmlBufContent(in);
2265
2266
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2267
0
         content[0], content[1],
2268
0
         content[2], content[3]);
2269
0
      buf[49] = 0;
2270
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2271
0
        "input conversion failed due to input error, bytes %s\n",
2272
0
               buf);
2273
0
        }
2274
0
    }
2275
    /*
2276
     * Ignore when input buffer is not on a boundary
2277
     */
2278
0
    if (ret == -3)
2279
0
        ret = 0;
2280
0
    return (c_out? c_out : ret);
2281
0
}
2282
2283
/**
2284
 * xmlCharEncInFunc:
2285
 * @handler:  char encoding transformation data structure
2286
 * @out:  an xmlBuffer for the output.
2287
 * @in:  an xmlBuffer for the input
2288
 *
2289
 * Generic front-end for the encoding handler input function
2290
 *
2291
 * Returns the number of byte written if success, or
2292
 *     -1 general error
2293
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2294
 *        the result of transformation can't fit into the encoding we want), or
2295
 */
2296
int
2297
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2298
                 xmlBufferPtr in)
2299
0
{
2300
0
    int ret;
2301
0
    int written;
2302
0
    int toconv;
2303
2304
0
    if (handler == NULL)
2305
0
        return (-1);
2306
0
    if (out == NULL)
2307
0
        return (-1);
2308
0
    if (in == NULL)
2309
0
        return (-1);
2310
2311
0
    toconv = in->use;
2312
0
    if (toconv == 0)
2313
0
        return (0);
2314
0
    written = out->size - out->use -1; /* count '\0' */
2315
0
    if (toconv * 2 >= written) {
2316
0
        xmlBufferGrow(out, out->size + toconv * 2);
2317
0
        written = out->size - out->use - 1;
2318
0
    }
2319
0
    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2320
0
                           in->content, &toconv);
2321
0
    xmlBufferShrink(in, toconv);
2322
0
    out->use += written;
2323
0
    out->content[out->use] = 0;
2324
0
    if (ret == -1)
2325
0
        ret = -3;
2326
2327
0
    switch (ret) {
2328
0
        case 0:
2329
#ifdef DEBUG_ENCODING
2330
            xmlGenericError(xmlGenericErrorContext,
2331
                            "converted %d bytes to %d bytes of input\n",
2332
                            toconv, written);
2333
#endif
2334
0
            break;
2335
0
        case -1:
2336
#ifdef DEBUG_ENCODING
2337
            xmlGenericError(xmlGenericErrorContext,
2338
                         "converted %d bytes to %d bytes of input, %d left\n",
2339
                            toconv, written, in->use);
2340
#endif
2341
0
            break;
2342
0
        case -3:
2343
#ifdef DEBUG_ENCODING
2344
            xmlGenericError(xmlGenericErrorContext,
2345
                        "converted %d bytes to %d bytes of input, %d left\n",
2346
                            toconv, written, in->use);
2347
#endif
2348
0
            break;
2349
0
        case -2: {
2350
0
            char buf[50];
2351
2352
0
      snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353
0
         in->content[0], in->content[1],
2354
0
         in->content[2], in->content[3]);
2355
0
      buf[49] = 0;
2356
0
      xmlEncodingErr(XML_I18N_CONV_FAILED,
2357
0
        "input conversion failed due to input error, bytes %s\n",
2358
0
               buf);
2359
0
        }
2360
0
    }
2361
    /*
2362
     * Ignore when input buffer is not on a boundary
2363
     */
2364
0
    if (ret == -3)
2365
0
        ret = 0;
2366
0
    return (written? written : ret);
2367
0
}
2368
2369
#ifdef LIBXML_OUTPUT_ENABLED
2370
/**
2371
 * xmlCharEncOutput:
2372
 * @output: a parser output buffer
2373
 * @init: is this an initialization call without data
2374
 *
2375
 * Generic front-end for the encoding handler on parser output
2376
 * a first call with @init == 1 has to be made first to initiate the
2377
 * output in case of non-stateless encoding needing to initiate their
2378
 * state or the output (like the BOM in UTF16).
2379
 * In case of UTF8 sequence conversion errors for the given encoder,
2380
 * the content will be automatically remapped to a CharRef sequence.
2381
 *
2382
 * Returns the number of byte written if success, or
2383
 *     -1 general error
2384
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2385
 *        the result of transformation can't fit into the encoding we want), or
2386
 */
2387
int
2388
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2389
0
{
2390
0
    int ret;
2391
0
    size_t written;
2392
0
    size_t writtentot = 0;
2393
0
    size_t toconv;
2394
0
    int c_in;
2395
0
    int c_out;
2396
0
    xmlBufPtr in;
2397
0
    xmlBufPtr out;
2398
2399
0
    if ((output == NULL) || (output->encoder == NULL) ||
2400
0
        (output->buffer == NULL) || (output->conv == NULL))
2401
0
        return (-1);
2402
0
    out = output->conv;
2403
0
    in = output->buffer;
2404
2405
0
retry:
2406
2407
0
    written = xmlBufAvail(out);
2408
0
    if (written > 0)
2409
0
        written--; /* count '\0' */
2410
2411
    /*
2412
     * First specific handling of the initialization call
2413
     */
2414
0
    if (init) {
2415
0
        c_in = 0;
2416
0
        c_out = written;
2417
        /* TODO: Check return value. */
2418
0
        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2419
0
                          NULL, &c_in);
2420
0
        xmlBufAddLen(out, c_out);
2421
#ifdef DEBUG_ENCODING
2422
  xmlGenericError(xmlGenericErrorContext,
2423
    "initialized encoder\n");
2424
#endif
2425
0
        return(0);
2426
0
    }
2427
2428
    /*
2429
     * Conversion itself.
2430
     */
2431
0
    toconv = xmlBufUse(in);
2432
0
    if (toconv == 0)
2433
0
        return (0);
2434
0
    if (toconv > 64 * 1024)
2435
0
        toconv = 64 * 1024;
2436
0
    if (toconv * 4 >= written) {
2437
0
        xmlBufGrow(out, toconv * 4);
2438
0
        written = xmlBufAvail(out) - 1;
2439
0
    }
2440
0
    if (written > 256 * 1024)
2441
0
        written = 256 * 1024;
2442
2443
0
    c_in = toconv;
2444
0
    c_out = written;
2445
0
    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2446
0
                            xmlBufContent(in), &c_in);
2447
0
    xmlBufShrink(in, c_in);
2448
0
    xmlBufAddLen(out, c_out);
2449
0
    writtentot += c_out;
2450
0
    if (ret == -1) {
2451
0
        if (c_out > 0) {
2452
            /* Can be a limitation of iconv or uconv */
2453
0
            goto retry;
2454
0
        }
2455
0
        ret = -3;
2456
0
    }
2457
2458
0
    if (ret >= 0) output += ret;
2459
2460
    /*
2461
     * Attempt to handle error cases
2462
     */
2463
0
    switch (ret) {
2464
0
        case 0:
2465
#ifdef DEBUG_ENCODING
2466
      xmlGenericError(xmlGenericErrorContext,
2467
        "converted %d bytes to %d bytes of output\n",
2468
              c_in, c_out);
2469
#endif
2470
0
      break;
2471
0
        case -1:
2472
#ifdef DEBUG_ENCODING
2473
      xmlGenericError(xmlGenericErrorContext,
2474
        "output conversion failed by lack of space\n");
2475
#endif
2476
0
      break;
2477
0
        case -3:
2478
#ifdef DEBUG_ENCODING
2479
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2480
              c_in, c_out, (int) xmlBufUse(in));
2481
#endif
2482
0
      break;
2483
0
        case -4:
2484
0
            xmlEncodingErr(XML_I18N_NO_OUTPUT,
2485
0
                           "xmlCharEncOutFunc: no output function !\n", NULL);
2486
0
            ret = -1;
2487
0
            break;
2488
0
        case -2: {
2489
0
      xmlChar charref[20];
2490
0
      int len = (int) xmlBufUse(in);
2491
0
            xmlChar *content = xmlBufContent(in);
2492
0
      int cur, charrefLen;
2493
2494
0
      cur = xmlGetUTF8Char(content, &len);
2495
0
      if (cur <= 0)
2496
0
                break;
2497
2498
#ifdef DEBUG_ENCODING
2499
            xmlGenericError(xmlGenericErrorContext,
2500
                    "handling output conversion error\n");
2501
            xmlGenericError(xmlGenericErrorContext,
2502
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2503
                    content[0], content[1],
2504
                    content[2], content[3]);
2505
#endif
2506
            /*
2507
             * Removes the UTF8 sequence, and replace it by a charref
2508
             * and continue the transcoding phase, hoping the error
2509
             * did not mangle the encoder state.
2510
             */
2511
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2512
0
                             "&#%d;", cur);
2513
0
            xmlBufShrink(in, len);
2514
0
            xmlBufGrow(out, charrefLen * 4);
2515
0
            c_out = xmlBufAvail(out) - 1;
2516
0
            c_in = charrefLen;
2517
0
            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2518
0
                                    charref, &c_in);
2519
2520
0
      if ((ret < 0) || (c_in != charrefLen)) {
2521
0
    char buf[50];
2522
2523
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2524
0
       content[0], content[1],
2525
0
       content[2], content[3]);
2526
0
    buf[49] = 0;
2527
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2528
0
        "output conversion failed due to conv error, bytes %s\n",
2529
0
             buf);
2530
0
    if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2531
0
        content[0] = ' ';
2532
0
                break;
2533
0
      }
2534
2535
0
            xmlBufAddLen(out, c_out);
2536
0
            writtentot += c_out;
2537
0
            goto retry;
2538
0
  }
2539
0
    }
2540
0
    return(ret);
2541
0
}
2542
#endif
2543
2544
/**
2545
 * xmlCharEncOutFunc:
2546
 * @handler:  char enconding transformation data structure
2547
 * @out:  an xmlBuffer for the output.
2548
 * @in:  an xmlBuffer for the input
2549
 *
2550
 * Generic front-end for the encoding handler output function
2551
 * a first call with @in == NULL has to be made firs to initiate the
2552
 * output in case of non-stateless encoding needing to initiate their
2553
 * state or the output (like the BOM in UTF16).
2554
 * In case of UTF8 sequence conversion errors for the given encoder,
2555
 * the content will be automatically remapped to a CharRef sequence.
2556
 *
2557
 * Returns the number of byte written if success, or
2558
 *     -1 general error
2559
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2560
 *        the result of transformation can't fit into the encoding we want), or
2561
 */
2562
int
2563
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2564
0
                  xmlBufferPtr in) {
2565
0
    int ret;
2566
0
    int written;
2567
0
    int writtentot = 0;
2568
0
    int toconv;
2569
0
    int output = 0;
2570
2571
0
    if (handler == NULL) return(-1);
2572
0
    if (out == NULL) return(-1);
2573
2574
0
retry:
2575
2576
0
    written = out->size - out->use;
2577
2578
0
    if (written > 0)
2579
0
  written--; /* Gennady: count '/0' */
2580
2581
    /*
2582
     * First specific handling of in = NULL, i.e. the initialization call
2583
     */
2584
0
    if (in == NULL) {
2585
0
        toconv = 0;
2586
        /* TODO: Check return value. */
2587
0
        xmlEncOutputChunk(handler, &out->content[out->use], &written,
2588
0
                          NULL, &toconv);
2589
0
        out->use += written;
2590
0
        out->content[out->use] = 0;
2591
#ifdef DEBUG_ENCODING
2592
  xmlGenericError(xmlGenericErrorContext,
2593
    "initialized encoder\n");
2594
#endif
2595
0
        return(0);
2596
0
    }
2597
2598
    /*
2599
     * Conversion itself.
2600
     */
2601
0
    toconv = in->use;
2602
0
    if (toconv == 0)
2603
0
  return(0);
2604
0
    if (toconv * 4 >= written) {
2605
0
        xmlBufferGrow(out, toconv * 4);
2606
0
  written = out->size - out->use - 1;
2607
0
    }
2608
0
    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2609
0
                            in->content, &toconv);
2610
0
    xmlBufferShrink(in, toconv);
2611
0
    out->use += written;
2612
0
    writtentot += written;
2613
0
    out->content[out->use] = 0;
2614
0
    if (ret == -1) {
2615
0
        if (written > 0) {
2616
            /* Can be a limitation of iconv or uconv */
2617
0
            goto retry;
2618
0
        }
2619
0
        ret = -3;
2620
0
    }
2621
2622
0
    if (ret >= 0) output += ret;
2623
2624
    /*
2625
     * Attempt to handle error cases
2626
     */
2627
0
    switch (ret) {
2628
0
        case 0:
2629
#ifdef DEBUG_ENCODING
2630
      xmlGenericError(xmlGenericErrorContext,
2631
        "converted %d bytes to %d bytes of output\n",
2632
              toconv, written);
2633
#endif
2634
0
      break;
2635
0
        case -1:
2636
#ifdef DEBUG_ENCODING
2637
      xmlGenericError(xmlGenericErrorContext,
2638
        "output conversion failed by lack of space\n");
2639
#endif
2640
0
      break;
2641
0
        case -3:
2642
#ifdef DEBUG_ENCODING
2643
      xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2644
              toconv, written, in->use);
2645
#endif
2646
0
      break;
2647
0
        case -4:
2648
0
      xmlEncodingErr(XML_I18N_NO_OUTPUT,
2649
0
               "xmlCharEncOutFunc: no output function !\n", NULL);
2650
0
      ret = -1;
2651
0
            break;
2652
0
        case -2: {
2653
0
      xmlChar charref[20];
2654
0
      int len = in->use;
2655
0
      const xmlChar *utf = (const xmlChar *) in->content;
2656
0
      int cur, charrefLen;
2657
2658
0
      cur = xmlGetUTF8Char(utf, &len);
2659
0
      if (cur <= 0)
2660
0
                break;
2661
2662
#ifdef DEBUG_ENCODING
2663
            xmlGenericError(xmlGenericErrorContext,
2664
                    "handling output conversion error\n");
2665
            xmlGenericError(xmlGenericErrorContext,
2666
                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2667
                    in->content[0], in->content[1],
2668
                    in->content[2], in->content[3]);
2669
#endif
2670
            /*
2671
             * Removes the UTF8 sequence, and replace it by a charref
2672
             * and continue the transcoding phase, hoping the error
2673
             * did not mangle the encoder state.
2674
             */
2675
0
            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2676
0
                             "&#%d;", cur);
2677
0
            xmlBufferShrink(in, len);
2678
0
            xmlBufferGrow(out, charrefLen * 4);
2679
0
      written = out->size - out->use - 1;
2680
0
            toconv = charrefLen;
2681
0
            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2682
0
                                    charref, &toconv);
2683
2684
0
      if ((ret < 0) || (toconv != charrefLen)) {
2685
0
    char buf[50];
2686
2687
0
    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2688
0
       in->content[0], in->content[1],
2689
0
       in->content[2], in->content[3]);
2690
0
    buf[49] = 0;
2691
0
    xmlEncodingErr(XML_I18N_CONV_FAILED,
2692
0
        "output conversion failed due to conv error, bytes %s\n",
2693
0
             buf);
2694
0
    if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2695
0
        in->content[0] = ' ';
2696
0
          break;
2697
0
      }
2698
2699
0
            out->use += written;
2700
0
            writtentot += written;
2701
0
            out->content[out->use] = 0;
2702
0
            goto retry;
2703
0
  }
2704
0
    }
2705
0
    return(ret);
2706
0
}
2707
2708
/**
2709
 * xmlCharEncCloseFunc:
2710
 * @handler:  char enconding transformation data structure
2711
 *
2712
 * Generic front-end for encoding handler close function
2713
 *
2714
 * Returns 0 if success, or -1 in case of error
2715
 */
2716
int
2717
0
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2718
0
    int ret = 0;
2719
0
    int tofree = 0;
2720
0
    int i, handler_in_list = 0;
2721
2722
0
    if (handler == NULL) return(-1);
2723
0
    if (handler->name == NULL) return(-1);
2724
0
    if (handlers != NULL) {
2725
0
        for (i = 0;i < nbCharEncodingHandler; i++) {
2726
0
            if (handler == handlers[i]) {
2727
0
          handler_in_list = 1;
2728
0
    break;
2729
0
      }
2730
0
  }
2731
0
    }
2732
0
#ifdef LIBXML_ICONV_ENABLED
2733
    /*
2734
     * Iconv handlers can be used only once, free the whole block.
2735
     * and the associated icon resources.
2736
     */
2737
0
    if ((handler_in_list == 0) &&
2738
0
        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2739
0
        tofree = 1;
2740
0
  if (handler->iconv_out != NULL) {
2741
0
      if (iconv_close(handler->iconv_out))
2742
0
    ret = -1;
2743
0
      handler->iconv_out = NULL;
2744
0
  }
2745
0
  if (handler->iconv_in != NULL) {
2746
0
      if (iconv_close(handler->iconv_in))
2747
0
    ret = -1;
2748
0
      handler->iconv_in = NULL;
2749
0
  }
2750
0
    }
2751
0
#endif /* LIBXML_ICONV_ENABLED */
2752
#ifdef LIBXML_ICU_ENABLED
2753
    if ((handler_in_list == 0) &&
2754
        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2755
        tofree = 1;
2756
  if (handler->uconv_out != NULL) {
2757
      closeIcuConverter(handler->uconv_out);
2758
      handler->uconv_out = NULL;
2759
  }
2760
  if (handler->uconv_in != NULL) {
2761
      closeIcuConverter(handler->uconv_in);
2762
      handler->uconv_in = NULL;
2763
  }
2764
    }
2765
#endif
2766
0
    if (tofree) {
2767
        /* free up only dynamic handlers iconv/uconv */
2768
0
        if (handler->name != NULL)
2769
0
            xmlFree(handler->name);
2770
0
        handler->name = NULL;
2771
0
        xmlFree(handler);
2772
0
    }
2773
#ifdef DEBUG_ENCODING
2774
    if (ret)
2775
        xmlGenericError(xmlGenericErrorContext,
2776
    "failed to close the encoding handler\n");
2777
    else
2778
        xmlGenericError(xmlGenericErrorContext,
2779
    "closed the encoding handler\n");
2780
#endif
2781
2782
0
    return(ret);
2783
0
}
2784
2785
/**
2786
 * xmlByteConsumed:
2787
 * @ctxt: an XML parser context
2788
 *
2789
 * This function provides the current index of the parser relative
2790
 * to the start of the current entity. This function is computed in
2791
 * bytes from the beginning starting at zero and finishing at the
2792
 * size in byte of the file if parsing a file. The function is
2793
 * of constant cost if the input is UTF-8 but can be costly if run
2794
 * on non-UTF-8 input.
2795
 *
2796
 * Returns the index in bytes from the beginning of the entity or -1
2797
 *         in case the index could not be computed.
2798
 */
2799
long
2800
0
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2801
0
    xmlParserInputPtr in;
2802
2803
0
    if (ctxt == NULL) return(-1);
2804
0
    in = ctxt->input;
2805
0
    if (in == NULL)  return(-1);
2806
0
    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2807
0
        unsigned int unused = 0;
2808
0
  xmlCharEncodingHandler * handler = in->buf->encoder;
2809
        /*
2810
   * Encoding conversion, compute the number of unused original
2811
   * bytes from the input not consumed and substract that from
2812
   * the raw consumed value, this is not a cheap operation
2813
   */
2814
0
        if (in->end - in->cur > 0) {
2815
0
      unsigned char convbuf[32000];
2816
0
      const unsigned char *cur = (const unsigned char *)in->cur;
2817
0
      int toconv = in->end - in->cur, written = 32000;
2818
2819
0
      int ret;
2820
2821
0
            do {
2822
0
                toconv = in->end - cur;
2823
0
                written = 32000;
2824
0
                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2825
0
                                        cur, &toconv);
2826
0
                if (ret < 0) {
2827
0
                    if (written > 0)
2828
0
                        ret = -2;
2829
0
                    else
2830
0
                        return(-1);
2831
0
                }
2832
0
                unused += written;
2833
0
                cur += toconv;
2834
0
            } while (ret == -2);
2835
0
  }
2836
0
  if (in->buf->rawconsumed < unused)
2837
0
      return(-1);
2838
0
  return(in->buf->rawconsumed - unused);
2839
0
    }
2840
0
    return(in->consumed + (in->cur - in->base));
2841
0
}
2842
2843
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2844
#ifdef LIBXML_ISO8859X_ENABLED
2845
2846
/**
2847
 * UTF8ToISO8859x:
2848
 * @out:  a pointer to an array of bytes to store the result
2849
 * @outlen:  the length of @out
2850
 * @in:  a pointer to an array of UTF-8 chars
2851
 * @inlen:  the length of @in
2852
 * @xlattable: the 2-level transcoding table
2853
 *
2854
 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2855
 * block of chars out.
2856
 *
2857
 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2858
 * The value of @inlen after return is the number of octets consumed
2859
 *     as the return value is positive, else unpredictable.
2860
 * The value of @outlen after return is the number of ocetes consumed.
2861
 */
2862
static int
2863
UTF8ToISO8859x(unsigned char* out, int *outlen,
2864
              const unsigned char* in, int *inlen,
2865
              unsigned char const *xlattable) {
2866
    const unsigned char* outstart = out;
2867
    const unsigned char* inend;
2868
    const unsigned char* instart = in;
2869
    const unsigned char* processed = in;
2870
2871
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2872
        (xlattable == NULL))
2873
  return(-1);
2874
    if (in == NULL) {
2875
        /*
2876
        * initialization nothing to do
2877
        */
2878
        *outlen = 0;
2879
        *inlen = 0;
2880
        return(0);
2881
    }
2882
    inend = in + (*inlen);
2883
    while (in < inend) {
2884
        unsigned char d = *in++;
2885
        if  (d < 0x80)  {
2886
            *out++ = d;
2887
        } else if (d < 0xC0) {
2888
            /* trailing byte in leading position */
2889
            *outlen = out - outstart;
2890
            *inlen = processed - instart;
2891
            return(-2);
2892
        } else if (d < 0xE0) {
2893
            unsigned char c;
2894
            if (!(in < inend)) {
2895
                /* trailing byte not in input buffer */
2896
                *outlen = out - outstart;
2897
                *inlen = processed - instart;
2898
                return(-3);
2899
            }
2900
            c = *in++;
2901
            if ((c & 0xC0) != 0x80) {
2902
                /* not a trailing byte */
2903
                *outlen = out - outstart;
2904
                *inlen = processed - instart;
2905
                return(-2);
2906
            }
2907
            c = c & 0x3F;
2908
            d = d & 0x1F;
2909
            d = xlattable [48 + c + xlattable [d] * 64];
2910
            if (d == 0) {
2911
                /* not in character set */
2912
                *outlen = out - outstart;
2913
                *inlen = processed - instart;
2914
                return(-2);
2915
            }
2916
            *out++ = d;
2917
        } else if (d < 0xF0) {
2918
            unsigned char c1;
2919
            unsigned char c2;
2920
            if (!(in < inend - 1)) {
2921
                /* trailing bytes not in input buffer */
2922
                *outlen = out - outstart;
2923
                *inlen = processed - instart;
2924
                return(-3);
2925
            }
2926
            c1 = *in++;
2927
            if ((c1 & 0xC0) != 0x80) {
2928
                /* not a trailing byte (c1) */
2929
                *outlen = out - outstart;
2930
                *inlen = processed - instart;
2931
                return(-2);
2932
            }
2933
            c2 = *in++;
2934
            if ((c2 & 0xC0) != 0x80) {
2935
                /* not a trailing byte (c2) */
2936
                *outlen = out - outstart;
2937
                *inlen = processed - instart;
2938
                return(-2);
2939
            }
2940
            c1 = c1 & 0x3F;
2941
            c2 = c2 & 0x3F;
2942
      d = d & 0x0F;
2943
      d = xlattable [48 + c2 + xlattable [48 + c1 +
2944
      xlattable [32 + d] * 64] * 64];
2945
            if (d == 0) {
2946
                /* not in character set */
2947
                *outlen = out - outstart;
2948
                *inlen = processed - instart;
2949
                return(-2);
2950
            }
2951
            *out++ = d;
2952
        } else {
2953
            /* cannot transcode >= U+010000 */
2954
            *outlen = out - outstart;
2955
            *inlen = processed - instart;
2956
            return(-2);
2957
        }
2958
        processed = in;
2959
    }
2960
    *outlen = out - outstart;
2961
    *inlen = processed - instart;
2962
    return(*outlen);
2963
}
2964
2965
/**
2966
 * ISO8859xToUTF8
2967
 * @out:  a pointer to an array of bytes to store the result
2968
 * @outlen:  the length of @out
2969
 * @in:  a pointer to an array of ISO Latin 1 chars
2970
 * @inlen:  the length of @in
2971
 *
2972
 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2973
 * block of chars out.
2974
 * Returns 0 if success, or -1 otherwise
2975
 * The value of @inlen after return is the number of octets consumed
2976
 * The value of @outlen after return is the number of ocetes produced.
2977
 */
2978
static int
2979
ISO8859xToUTF8(unsigned char* out, int *outlen,
2980
              const unsigned char* in, int *inlen,
2981
              unsigned short const *unicodetable) {
2982
    unsigned char* outstart = out;
2983
    unsigned char* outend;
2984
    const unsigned char* instart = in;
2985
    const unsigned char* inend;
2986
    const unsigned char* instop;
2987
    unsigned int c;
2988
2989
    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2990
        (in == NULL) || (unicodetable == NULL))
2991
  return(-1);
2992
    outend = out + *outlen;
2993
    inend = in + *inlen;
2994
    instop = inend;
2995
2996
    while ((in < inend) && (out < outend - 2)) {
2997
        if (*in >= 0x80) {
2998
            c = unicodetable [*in - 0x80];
2999
            if (c == 0) {
3000
                /* undefined code point */
3001
                *outlen = out - outstart;
3002
                *inlen = in - instart;
3003
                return (-1);
3004
            }
3005
            if (c < 0x800) {
3006
                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3007
                *out++ = (c & 0x3F) | 0x80;
3008
            } else {
3009
                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3010
                *out++ = ((c >>  6) & 0x3F) | 0x80;
3011
                *out++ = (c & 0x3F) | 0x80;
3012
            }
3013
            ++in;
3014
        }
3015
        if (instop - in > outend - out) instop = in + (outend - out);
3016
        while ((*in < 0x80) && (in < instop)) {
3017
            *out++ = *in++;
3018
        }
3019
    }
3020
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3021
        *out++ =  *in++;
3022
    }
3023
    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024
        *out++ =  *in++;
3025
    }
3026
    *outlen = out - outstart;
3027
    *inlen = in - instart;
3028
    return (*outlen);
3029
}
3030
3031
3032
/************************************************************************
3033
 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3034
 ************************************************************************/
3035
3036
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3037
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3038
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3039
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3040
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3041
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3042
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3043
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3044
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3045
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3046
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3047
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3048
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3049
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3050
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3051
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3052
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3053
};
3054
3055
static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3056
    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3057
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3064
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3065
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3066
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3067
    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3068
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3069
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3071
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3072
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3073
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075
    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3076
    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3077
    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3078
    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3079
    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3080
    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3081
    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3082
    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3083
};
3084
3085
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3086
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3087
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3088
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3089
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3090
    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3091
    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3092
    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3093
    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3094
    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3095
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3096
    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3097
    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3098
    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3099
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3100
    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3101
    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3102
};
3103
3104
static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3105
    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3106
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3113
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3114
    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3115
    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3116
    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3117
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3118
    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3119
    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121
    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3122
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124
    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3130
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3131
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3132
    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3133
    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3134
    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3135
    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3136
};
3137
3138
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3139
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3140
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3141
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3142
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3143
    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3144
    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3145
    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3146
    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3147
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3148
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3149
    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3150
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3151
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3152
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3153
    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3154
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3155
};
3156
3157
static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3158
    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3159
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3166
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3167
    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3168
    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3169
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3170
    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3171
    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3172
    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3173
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3174
    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3175
    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3176
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3177
    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3178
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3179
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3182
    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3183
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3184
    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3185
};
3186
3187
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3188
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3189
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3190
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3191
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3192
    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3193
    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3194
    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3195
    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3196
    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3197
    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3198
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3199
    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3200
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3201
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3202
    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3203
    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3204
};
3205
3206
static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3207
    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3215
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3216
    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3217
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3219
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3220
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3222
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3224
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226
    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231
    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234
};
3235
3236
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3237
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3238
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3239
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3240
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3241
    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3242
    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3243
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3244
    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3245
    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3246
    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3247
    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3248
    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3249
    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3250
    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3251
    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3253
};
3254
3255
static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3256
    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3258
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3264
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3265
    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3266
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3272
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3273
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3274
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3275
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3276
    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279
};
3280
3281
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3282
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3283
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3284
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3285
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3286
    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3287
    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3288
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3289
    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3290
    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3291
    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3292
    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3293
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3294
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3295
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3296
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3297
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3298
};
3299
3300
static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3301
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3302
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3309
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3310
    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3311
    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3312
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317
    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3318
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324
    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3325
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326
    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3327
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3329
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332
};
3333
3334
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3335
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3336
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3337
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3338
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3339
    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3340
    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3341
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3342
    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3343
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3344
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3345
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3346
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3347
    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3348
    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3349
    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3350
    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3351
};
3352
3353
static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3354
    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355
    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3356
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3362
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3363
    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3364
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3365
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3371
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3373
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3378
    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3379
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3384
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385
};
3386
3387
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3388
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3389
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3390
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3391
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3392
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3393
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3394
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3395
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3396
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3397
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3398
    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3399
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3400
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3401
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3402
    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3403
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3404
};
3405
3406
static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3407
    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3415
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3416
    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3417
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3418
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3420
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3422
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3424
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425
    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3428
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430
};
3431
3432
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3433
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3434
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3435
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3436
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3437
    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3438
    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3439
    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3440
    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3441
    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3442
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3443
    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3444
    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3445
    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3446
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3447
    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3448
    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3449
};
3450
3451
static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3452
    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3460
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3461
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3462
    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3463
    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3464
    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3465
    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3466
    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3467
    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3468
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469
    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3470
    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3471
    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476
    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479
    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3480
    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3481
    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3482
    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3483
};
3484
3485
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3486
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3487
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3488
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3489
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3490
    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3491
    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3492
    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3493
    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3494
    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3495
    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3496
    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3497
    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3498
    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3499
    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3500
    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3501
    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3502
};
3503
3504
static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3505
    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507
    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3513
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3514
    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3520
    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521
    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3524
    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3529
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3530
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532
};
3533
3534
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3535
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3536
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3537
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3538
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3539
    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3540
    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3541
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3542
    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3543
    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3544
    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3545
    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3546
    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3547
    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3548
    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3549
    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3550
    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3551
};
3552
3553
static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3554
    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3562
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3563
    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3564
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3565
    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3571
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573
    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3574
    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3575
    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3576
    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3577
    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3578
    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3579
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3580
    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3581
    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3582
    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3583
    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3584
    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3585
};
3586
3587
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3588
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3589
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3590
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3591
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3592
    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3593
    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3594
    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3595
    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3596
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3597
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3598
    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3599
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3600
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3601
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3602
    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3603
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3604
};
3605
3606
static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3607
    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609
    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3615
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3616
    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3617
    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621
    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3622
    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3623
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3624
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3627
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628
    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630
    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637
    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641
    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3642
    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643
    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3644
    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3647
    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3648
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3649
    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3650
};
3651
3652
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3653
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3658
    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3659
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3660
    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3661
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3664
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3665
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3668
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3669
};
3670
3671
static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3672
    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674
    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681
    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3682
    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3683
    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3690
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692
    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694
    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3695
    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3696
    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3697
    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3698
    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3699
};
3700
3701
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3702
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3703
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3704
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3705
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3706
    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3707
    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3708
    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3709
    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3710
    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3711
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3712
    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3713
    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3714
    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3715
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3716
    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3717
    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3718
};
3719
3720
static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3721
    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3722
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723
    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728
    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3729
    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3730
    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3731
    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3732
    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3733
    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3734
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736
    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737
    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3738
    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739
    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3740
    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3750
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753
    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3754
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755
    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756
    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757
    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3758
    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759
    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3760
};
3761
3762
3763
/*
3764
 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3765
 */
3766
3767
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3768
    const unsigned char* in, int *inlen) {
3769
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3770
}
3771
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3772
    const unsigned char* in, int *inlen) {
3773
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3774
}
3775
3776
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3777
    const unsigned char* in, int *inlen) {
3778
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3779
}
3780
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3781
    const unsigned char* in, int *inlen) {
3782
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3783
}
3784
3785
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3786
    const unsigned char* in, int *inlen) {
3787
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3788
}
3789
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3790
    const unsigned char* in, int *inlen) {
3791
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3792
}
3793
3794
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3795
    const unsigned char* in, int *inlen) {
3796
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3797
}
3798
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3799
    const unsigned char* in, int *inlen) {
3800
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3801
}
3802
3803
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3804
    const unsigned char* in, int *inlen) {
3805
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3806
}
3807
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3808
    const unsigned char* in, int *inlen) {
3809
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3810
}
3811
3812
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3813
    const unsigned char* in, int *inlen) {
3814
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3815
}
3816
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3817
    const unsigned char* in, int *inlen) {
3818
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3819
}
3820
3821
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3822
    const unsigned char* in, int *inlen) {
3823
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3824
}
3825
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3826
    const unsigned char* in, int *inlen) {
3827
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3828
}
3829
3830
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3831
    const unsigned char* in, int *inlen) {
3832
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3833
}
3834
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3835
    const unsigned char* in, int *inlen) {
3836
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3837
}
3838
3839
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3840
    const unsigned char* in, int *inlen) {
3841
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3842
}
3843
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3844
    const unsigned char* in, int *inlen) {
3845
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3846
}
3847
3848
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3849
    const unsigned char* in, int *inlen) {
3850
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3851
}
3852
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3853
    const unsigned char* in, int *inlen) {
3854
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3855
}
3856
3857
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3858
    const unsigned char* in, int *inlen) {
3859
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3860
}
3861
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3862
    const unsigned char* in, int *inlen) {
3863
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3864
}
3865
3866
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3867
    const unsigned char* in, int *inlen) {
3868
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3869
}
3870
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3871
    const unsigned char* in, int *inlen) {
3872
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3873
}
3874
3875
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3876
    const unsigned char* in, int *inlen) {
3877
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3878
}
3879
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3880
    const unsigned char* in, int *inlen) {
3881
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3882
}
3883
3884
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3885
    const unsigned char* in, int *inlen) {
3886
    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3887
}
3888
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3889
    const unsigned char* in, int *inlen) {
3890
    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3891
}
3892
3893
static void
3894
xmlRegisterCharEncodingHandlersISO8859x (void) {
3895
    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3896
    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3897
    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3898
    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3899
    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3900
    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3901
    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3902
    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3903
    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3904
    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3905
    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3906
    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3907
    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3908
    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3909
}
3910
3911
#endif
3912
#endif
3913
3914
#define bottom_encoding
3915
#include "elfgcchack.h"